1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2009-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
28
29 #include "qemu/osdep.h"
30 #include "hw/boards.h"
31 #include "net/net.h"
32 #include "migration.h"
33 #include "migration/snapshot.h"
34 #include "migration-stats.h"
35 #include "migration/vmstate.h"
36 #include "migration/misc.h"
37 #include "migration/register.h"
38 #include "migration/global_state.h"
39 #include "migration/channel-block.h"
40 #include "ram.h"
41 #include "qemu-file.h"
42 #include "savevm.h"
43 #include "postcopy-ram.h"
44 #include "qapi/error.h"
45 #include "qapi/qapi-commands-migration.h"
46 #include "qapi/clone-visitor.h"
47 #include "qapi/qapi-builtin-visit.h"
48 #include "qemu/error-report.h"
49 #include "sysemu/cpus.h"
50 #include "exec/memory.h"
51 #include "exec/target_page.h"
52 #include "trace.h"
53 #include "qemu/iov.h"
54 #include "qemu/job.h"
55 #include "qemu/main-loop.h"
56 #include "block/snapshot.h"
57 #include "qemu/cutils.h"
58 #include "io/channel-buffer.h"
59 #include "io/channel-file.h"
60 #include "sysemu/replay.h"
61 #include "sysemu/runstate.h"
62 #include "sysemu/sysemu.h"
63 #include "sysemu/xen.h"
64 #include "migration/colo.h"
65 #include "qemu/bitmap.h"
66 #include "net/announce.h"
67 #include "qemu/yank.h"
68 #include "yank_functions.h"
69 #include "sysemu/qtest.h"
70 #include "options.h"
71
72 const unsigned int postcopy_ram_discard_version;
73
74 /* Subcommands for QEMU_VM_COMMAND */
75 enum qemu_vm_cmd {
76 MIG_CMD_INVALID = 0, /* Must be 0 */
77 MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */
78 MIG_CMD_PING, /* Request a PONG on the RP */
79
80 MIG_CMD_POSTCOPY_ADVISE, /* Prior to any page transfers, just
81 warn we might want to do PC */
82 MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming
83 pages as it's running. */
84 MIG_CMD_POSTCOPY_RUN, /* Start execution */
85
86 MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that
87 were previously sent during
88 precopy but are dirty. */
89 MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */
90 MIG_CMD_ENABLE_COLO, /* Enable COLO */
91 MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */
92 MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */
93 MIG_CMD_MAX
94 };
95
96 #define MAX_VM_CMD_PACKAGED_SIZE UINT32_MAX
97 static struct mig_cmd_args {
98 ssize_t len; /* -1 = variable */
99 const char *name;
100 } mig_cmd_args[] = {
101 [MIG_CMD_INVALID] = { .len = -1, .name = "INVALID" },
102 [MIG_CMD_OPEN_RETURN_PATH] = { .len = 0, .name = "OPEN_RETURN_PATH" },
103 [MIG_CMD_PING] = { .len = sizeof(uint32_t), .name = "PING" },
104 [MIG_CMD_POSTCOPY_ADVISE] = { .len = -1, .name = "POSTCOPY_ADVISE" },
105 [MIG_CMD_POSTCOPY_LISTEN] = { .len = 0, .name = "POSTCOPY_LISTEN" },
106 [MIG_CMD_POSTCOPY_RUN] = { .len = 0, .name = "POSTCOPY_RUN" },
107 [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
108 .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
109 [MIG_CMD_POSTCOPY_RESUME] = { .len = 0, .name = "POSTCOPY_RESUME" },
110 [MIG_CMD_PACKAGED] = { .len = 4, .name = "PACKAGED" },
111 [MIG_CMD_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" },
112 [MIG_CMD_MAX] = { .len = -1, .name = "MAX" },
113 };
114
115 /* Note for MIG_CMD_POSTCOPY_ADVISE:
116 * The format of arguments is depending on postcopy mode:
117 * - postcopy RAM only
118 * uint64_t host page size
119 * uint64_t target page size
120 *
121 * - postcopy RAM and postcopy dirty bitmaps
122 * format is the same as for postcopy RAM only
123 *
124 * - postcopy dirty bitmaps only
125 * Nothing. Command length field is 0.
126 *
127 * Be careful: adding a new postcopy entity with some other parameters should
128 * not break format self-description ability. Good way is to introduce some
129 * generic extendable format with an exception for two old entities.
130 */
131
132 /***********************************************************/
133 /* savevm/loadvm support */
134
qemu_fopen_bdrv(BlockDriverState * bs,int is_writable)135 static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
136 {
137 if (is_writable) {
138 return qemu_file_new_output(QIO_CHANNEL(qio_channel_block_new(bs)));
139 } else {
140 return qemu_file_new_input(QIO_CHANNEL(qio_channel_block_new(bs)));
141 }
142 }
143
144
145 /* QEMUFile timer support.
146 * Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c
147 */
148
timer_put(QEMUFile * f,QEMUTimer * ts)149 void timer_put(QEMUFile *f, QEMUTimer *ts)
150 {
151 uint64_t expire_time;
152
153 expire_time = timer_expire_time_ns(ts);
154 qemu_put_be64(f, expire_time);
155 }
156
timer_get(QEMUFile * f,QEMUTimer * ts)157 void timer_get(QEMUFile *f, QEMUTimer *ts)
158 {
159 uint64_t expire_time;
160
161 expire_time = qemu_get_be64(f);
162 if (expire_time != -1) {
163 timer_mod_ns(ts, expire_time);
164 } else {
165 timer_del(ts);
166 }
167 }
168
169
170 /* VMState timer support.
171 * Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c
172 */
173
get_timer(QEMUFile * f,void * pv,size_t size,const VMStateField * field)174 static int get_timer(QEMUFile *f, void *pv, size_t size,
175 const VMStateField *field)
176 {
177 QEMUTimer *v = pv;
178 timer_get(f, v);
179 return 0;
180 }
181
put_timer(QEMUFile * f,void * pv,size_t size,const VMStateField * field,JSONWriter * vmdesc)182 static int put_timer(QEMUFile *f, void *pv, size_t size,
183 const VMStateField *field, JSONWriter *vmdesc)
184 {
185 QEMUTimer *v = pv;
186 timer_put(f, v);
187
188 return 0;
189 }
190
191 const VMStateInfo vmstate_info_timer = {
192 .name = "timer",
193 .get = get_timer,
194 .put = put_timer,
195 };
196
197
198 typedef struct CompatEntry {
199 char idstr[256];
200 int instance_id;
201 } CompatEntry;
202
203 typedef struct SaveStateEntry {
204 QTAILQ_ENTRY(SaveStateEntry) entry;
205 char idstr[256];
206 uint32_t instance_id;
207 int alias_id;
208 int version_id;
209 /* version id read from the stream */
210 int load_version_id;
211 int section_id;
212 /* section id read from the stream */
213 int load_section_id;
214 const SaveVMHandlers *ops;
215 const VMStateDescription *vmsd;
216 void *opaque;
217 CompatEntry *compat;
218 int is_ram;
219 } SaveStateEntry;
220
221 typedef struct SaveState {
222 QTAILQ_HEAD(, SaveStateEntry) handlers;
223 SaveStateEntry *handler_pri_head[MIG_PRI_MAX + 1];
224 int global_section_id;
225 uint32_t len;
226 const char *name;
227 uint32_t target_page_bits;
228 uint32_t caps_count;
229 MigrationCapability *capabilities;
230 QemuUUID uuid;
231 } SaveState;
232
233 static SaveState savevm_state = {
234 .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
235 .handler_pri_head = { [MIG_PRI_DEFAULT ... MIG_PRI_MAX] = NULL },
236 .global_section_id = 0,
237 };
238
239 static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id);
240
should_validate_capability(int capability)241 static bool should_validate_capability(int capability)
242 {
243 assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX);
244 /* Validate only new capabilities to keep compatibility. */
245 switch (capability) {
246 case MIGRATION_CAPABILITY_X_IGNORE_SHARED:
247 case MIGRATION_CAPABILITY_MAPPED_RAM:
248 return true;
249 default:
250 return false;
251 }
252 }
253
get_validatable_capabilities_count(void)254 static uint32_t get_validatable_capabilities_count(void)
255 {
256 MigrationState *s = migrate_get_current();
257 uint32_t result = 0;
258 int i;
259 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
260 if (should_validate_capability(i) && s->capabilities[i]) {
261 result++;
262 }
263 }
264 return result;
265 }
266
configuration_pre_save(void * opaque)267 static int configuration_pre_save(void *opaque)
268 {
269 SaveState *state = opaque;
270 const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
271 MigrationState *s = migrate_get_current();
272 int i, j;
273
274 state->len = strlen(current_name);
275 state->name = current_name;
276 state->target_page_bits = qemu_target_page_bits();
277
278 state->caps_count = get_validatable_capabilities_count();
279 state->capabilities = g_renew(MigrationCapability, state->capabilities,
280 state->caps_count);
281 for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
282 if (should_validate_capability(i) && s->capabilities[i]) {
283 state->capabilities[j++] = i;
284 }
285 }
286 state->uuid = qemu_uuid;
287
288 return 0;
289 }
290
configuration_post_save(void * opaque)291 static int configuration_post_save(void *opaque)
292 {
293 SaveState *state = opaque;
294
295 g_free(state->capabilities);
296 state->capabilities = NULL;
297 state->caps_count = 0;
298 return 0;
299 }
300
configuration_pre_load(void * opaque)301 static int configuration_pre_load(void *opaque)
302 {
303 SaveState *state = opaque;
304
305 /* If there is no target-page-bits subsection it means the source
306 * predates the variable-target-page-bits support and is using the
307 * minimum possible value for this CPU.
308 */
309 state->target_page_bits = qemu_target_page_bits_min();
310 return 0;
311 }
312
configuration_validate_capabilities(SaveState * state)313 static bool configuration_validate_capabilities(SaveState *state)
314 {
315 bool ret = true;
316 MigrationState *s = migrate_get_current();
317 unsigned long *source_caps_bm;
318 int i;
319
320 source_caps_bm = bitmap_new(MIGRATION_CAPABILITY__MAX);
321 for (i = 0; i < state->caps_count; i++) {
322 MigrationCapability capability = state->capabilities[i];
323 set_bit(capability, source_caps_bm);
324 }
325
326 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
327 bool source_state, target_state;
328 if (!should_validate_capability(i)) {
329 continue;
330 }
331 source_state = test_bit(i, source_caps_bm);
332 target_state = s->capabilities[i];
333 if (source_state != target_state) {
334 error_report("Capability %s is %s, but received capability is %s",
335 MigrationCapability_str(i),
336 target_state ? "on" : "off",
337 source_state ? "on" : "off");
338 ret = false;
339 /* Don't break here to report all failed capabilities */
340 }
341 }
342
343 g_free(source_caps_bm);
344 return ret;
345 }
346
configuration_post_load(void * opaque,int version_id)347 static int configuration_post_load(void *opaque, int version_id)
348 {
349 SaveState *state = opaque;
350 const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
351 int ret = 0;
352
353 if (strncmp(state->name, current_name, state->len) != 0) {
354 error_report("Machine type received is '%.*s' and local is '%s'",
355 (int) state->len, state->name, current_name);
356 ret = -EINVAL;
357 goto out;
358 }
359
360 if (state->target_page_bits != qemu_target_page_bits()) {
361 error_report("Received TARGET_PAGE_BITS is %d but local is %d",
362 state->target_page_bits, qemu_target_page_bits());
363 ret = -EINVAL;
364 goto out;
365 }
366
367 if (!configuration_validate_capabilities(state)) {
368 ret = -EINVAL;
369 goto out;
370 }
371
372 out:
373 g_free((void *)state->name);
374 state->name = NULL;
375 state->len = 0;
376 g_free(state->capabilities);
377 state->capabilities = NULL;
378 state->caps_count = 0;
379
380 return ret;
381 }
382
get_capability(QEMUFile * f,void * pv,size_t size,const VMStateField * field)383 static int get_capability(QEMUFile *f, void *pv, size_t size,
384 const VMStateField *field)
385 {
386 MigrationCapability *capability = pv;
387 char capability_str[UINT8_MAX + 1];
388 uint8_t len;
389 int i;
390
391 len = qemu_get_byte(f);
392 qemu_get_buffer(f, (uint8_t *)capability_str, len);
393 capability_str[len] = '\0';
394 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
395 if (!strcmp(MigrationCapability_str(i), capability_str)) {
396 *capability = i;
397 return 0;
398 }
399 }
400 error_report("Received unknown capability %s", capability_str);
401 return -EINVAL;
402 }
403
put_capability(QEMUFile * f,void * pv,size_t size,const VMStateField * field,JSONWriter * vmdesc)404 static int put_capability(QEMUFile *f, void *pv, size_t size,
405 const VMStateField *field, JSONWriter *vmdesc)
406 {
407 MigrationCapability *capability = pv;
408 const char *capability_str = MigrationCapability_str(*capability);
409 size_t len = strlen(capability_str);
410 assert(len <= UINT8_MAX);
411
412 qemu_put_byte(f, len);
413 qemu_put_buffer(f, (uint8_t *)capability_str, len);
414 return 0;
415 }
416
417 static const VMStateInfo vmstate_info_capability = {
418 .name = "capability",
419 .get = get_capability,
420 .put = put_capability,
421 };
422
423 /* The target-page-bits subsection is present only if the
424 * target page size is not the same as the default (ie the
425 * minimum page size for a variable-page-size guest CPU).
426 * If it is present then it contains the actual target page
427 * bits for the machine, and migration will fail if the
428 * two ends don't agree about it.
429 */
vmstate_target_page_bits_needed(void * opaque)430 static bool vmstate_target_page_bits_needed(void *opaque)
431 {
432 return qemu_target_page_bits()
433 > qemu_target_page_bits_min();
434 }
435
436 static const VMStateDescription vmstate_target_page_bits = {
437 .name = "configuration/target-page-bits",
438 .version_id = 1,
439 .minimum_version_id = 1,
440 .needed = vmstate_target_page_bits_needed,
441 .fields = (const VMStateField[]) {
442 VMSTATE_UINT32(target_page_bits, SaveState),
443 VMSTATE_END_OF_LIST()
444 }
445 };
446
vmstate_capabilites_needed(void * opaque)447 static bool vmstate_capabilites_needed(void *opaque)
448 {
449 return get_validatable_capabilities_count() > 0;
450 }
451
452 static const VMStateDescription vmstate_capabilites = {
453 .name = "configuration/capabilities",
454 .version_id = 1,
455 .minimum_version_id = 1,
456 .needed = vmstate_capabilites_needed,
457 .fields = (const VMStateField[]) {
458 VMSTATE_UINT32_V(caps_count, SaveState, 1),
459 VMSTATE_VARRAY_UINT32_ALLOC(capabilities, SaveState, caps_count, 1,
460 vmstate_info_capability,
461 MigrationCapability),
462 VMSTATE_END_OF_LIST()
463 }
464 };
465
vmstate_uuid_needed(void * opaque)466 static bool vmstate_uuid_needed(void *opaque)
467 {
468 return qemu_uuid_set && migrate_validate_uuid();
469 }
470
vmstate_uuid_post_load(void * opaque,int version_id)471 static int vmstate_uuid_post_load(void *opaque, int version_id)
472 {
473 SaveState *state = opaque;
474 char uuid_src[UUID_STR_LEN];
475 char uuid_dst[UUID_STR_LEN];
476
477 if (!qemu_uuid_set) {
478 /*
479 * It's warning because user might not know UUID in some cases,
480 * e.g. load an old snapshot
481 */
482 qemu_uuid_unparse(&state->uuid, uuid_src);
483 warn_report("UUID is received %s, but local uuid isn't set",
484 uuid_src);
485 return 0;
486 }
487 if (!qemu_uuid_is_equal(&state->uuid, &qemu_uuid)) {
488 qemu_uuid_unparse(&state->uuid, uuid_src);
489 qemu_uuid_unparse(&qemu_uuid, uuid_dst);
490 error_report("UUID received is %s and local is %s", uuid_src, uuid_dst);
491 return -EINVAL;
492 }
493 return 0;
494 }
495
496 static const VMStateDescription vmstate_uuid = {
497 .name = "configuration/uuid",
498 .version_id = 1,
499 .minimum_version_id = 1,
500 .needed = vmstate_uuid_needed,
501 .post_load = vmstate_uuid_post_load,
502 .fields = (const VMStateField[]) {
503 VMSTATE_UINT8_ARRAY_V(uuid.data, SaveState, sizeof(QemuUUID), 1),
504 VMSTATE_END_OF_LIST()
505 }
506 };
507
508 static const VMStateDescription vmstate_configuration = {
509 .name = "configuration",
510 .version_id = 1,
511 .pre_load = configuration_pre_load,
512 .post_load = configuration_post_load,
513 .pre_save = configuration_pre_save,
514 .post_save = configuration_post_save,
515 .fields = (const VMStateField[]) {
516 VMSTATE_UINT32(len, SaveState),
517 VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
518 VMSTATE_END_OF_LIST()
519 },
520 .subsections = (const VMStateDescription * const []) {
521 &vmstate_target_page_bits,
522 &vmstate_capabilites,
523 &vmstate_uuid,
524 NULL
525 }
526 };
527
528 static void dump_vmstate_vmsd(FILE *out_file,
529 const VMStateDescription *vmsd, int indent,
530 bool is_subsection);
531
dump_vmstate_vmsf(FILE * out_file,const VMStateField * field,int indent)532 static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
533 int indent)
534 {
535 fprintf(out_file, "%*s{\n", indent, "");
536 indent += 2;
537 fprintf(out_file, "%*s\"field\": \"%s\",\n", indent, "", field->name);
538 fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
539 field->version_id);
540 fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "",
541 field->field_exists ? "true" : "false");
542 if (field->flags & VMS_ARRAY) {
543 fprintf(out_file, "%*s\"num\": %d,\n", indent, "", field->num);
544 }
545 fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size);
546 if (field->vmsd != NULL) {
547 fprintf(out_file, ",\n");
548 dump_vmstate_vmsd(out_file, field->vmsd, indent, false);
549 }
550 fprintf(out_file, "\n%*s}", indent - 2, "");
551 }
552
dump_vmstate_vmss(FILE * out_file,const VMStateDescription * subsection,int indent)553 static void dump_vmstate_vmss(FILE *out_file,
554 const VMStateDescription *subsection,
555 int indent)
556 {
557 if (subsection != NULL) {
558 dump_vmstate_vmsd(out_file, subsection, indent, true);
559 }
560 }
561
dump_vmstate_vmsd(FILE * out_file,const VMStateDescription * vmsd,int indent,bool is_subsection)562 static void dump_vmstate_vmsd(FILE *out_file,
563 const VMStateDescription *vmsd, int indent,
564 bool is_subsection)
565 {
566 if (is_subsection) {
567 fprintf(out_file, "%*s{\n", indent, "");
568 } else {
569 fprintf(out_file, "%*s\"%s\": {\n", indent, "", "Description");
570 }
571 indent += 2;
572 fprintf(out_file, "%*s\"name\": \"%s\",\n", indent, "", vmsd->name);
573 fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
574 vmsd->version_id);
575 fprintf(out_file, "%*s\"minimum_version_id\": %d", indent, "",
576 vmsd->minimum_version_id);
577 if (vmsd->fields != NULL) {
578 const VMStateField *field = vmsd->fields;
579 bool first;
580
581 fprintf(out_file, ",\n%*s\"Fields\": [\n", indent, "");
582 first = true;
583 while (field->name != NULL) {
584 if (field->flags & VMS_MUST_EXIST) {
585 /* Ignore VMSTATE_VALIDATE bits; these don't get migrated */
586 field++;
587 continue;
588 }
589 if (!first) {
590 fprintf(out_file, ",\n");
591 }
592 dump_vmstate_vmsf(out_file, field, indent + 2);
593 field++;
594 first = false;
595 }
596 assert(field->flags == VMS_END);
597 fprintf(out_file, "\n%*s]", indent, "");
598 }
599 if (vmsd->subsections != NULL) {
600 const VMStateDescription * const *subsection = vmsd->subsections;
601 bool first;
602
603 fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
604 first = true;
605 while (*subsection != NULL) {
606 if (!first) {
607 fprintf(out_file, ",\n");
608 }
609 dump_vmstate_vmss(out_file, *subsection, indent + 2);
610 subsection++;
611 first = false;
612 }
613 fprintf(out_file, "\n%*s]", indent, "");
614 }
615 fprintf(out_file, "\n%*s}", indent - 2, "");
616 }
617
dump_machine_type(FILE * out_file)618 static void dump_machine_type(FILE *out_file)
619 {
620 MachineClass *mc;
621
622 mc = MACHINE_GET_CLASS(current_machine);
623
624 fprintf(out_file, " \"vmschkmachine\": {\n");
625 fprintf(out_file, " \"Name\": \"%s\"\n", mc->name);
626 fprintf(out_file, " },\n");
627 }
628
dump_vmstate_json_to_file(FILE * out_file)629 void dump_vmstate_json_to_file(FILE *out_file)
630 {
631 GSList *list, *elt;
632 bool first;
633
634 fprintf(out_file, "{\n");
635 dump_machine_type(out_file);
636
637 first = true;
638 list = object_class_get_list(TYPE_DEVICE, true);
639 for (elt = list; elt; elt = elt->next) {
640 DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
641 TYPE_DEVICE);
642 const char *name;
643 int indent = 2;
644
645 if (!dc->vmsd) {
646 continue;
647 }
648
649 if (!first) {
650 fprintf(out_file, ",\n");
651 }
652 name = object_class_get_name(OBJECT_CLASS(dc));
653 fprintf(out_file, "%*s\"%s\": {\n", indent, "", name);
654 indent += 2;
655 fprintf(out_file, "%*s\"Name\": \"%s\",\n", indent, "", name);
656 fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
657 dc->vmsd->version_id);
658 fprintf(out_file, "%*s\"minimum_version_id\": %d,\n", indent, "",
659 dc->vmsd->minimum_version_id);
660
661 dump_vmstate_vmsd(out_file, dc->vmsd, indent, false);
662
663 fprintf(out_file, "\n%*s}", indent - 2, "");
664 first = false;
665 }
666 fprintf(out_file, "\n}\n");
667 fclose(out_file);
668 g_slist_free(list);
669 }
670
calculate_new_instance_id(const char * idstr)671 static uint32_t calculate_new_instance_id(const char *idstr)
672 {
673 SaveStateEntry *se;
674 uint32_t instance_id = 0;
675
676 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
677 if (strcmp(idstr, se->idstr) == 0
678 && instance_id <= se->instance_id) {
679 instance_id = se->instance_id + 1;
680 }
681 }
682 /* Make sure we never loop over without being noticed */
683 assert(instance_id != VMSTATE_INSTANCE_ID_ANY);
684 return instance_id;
685 }
686
calculate_compat_instance_id(const char * idstr)687 static int calculate_compat_instance_id(const char *idstr)
688 {
689 SaveStateEntry *se;
690 int instance_id = 0;
691
692 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
693 if (!se->compat) {
694 continue;
695 }
696
697 if (strcmp(idstr, se->compat->idstr) == 0
698 && instance_id <= se->compat->instance_id) {
699 instance_id = se->compat->instance_id + 1;
700 }
701 }
702 return instance_id;
703 }
704
save_state_priority(SaveStateEntry * se)705 static inline MigrationPriority save_state_priority(SaveStateEntry *se)
706 {
707 if (se->vmsd) {
708 return se->vmsd->priority;
709 }
710 return MIG_PRI_DEFAULT;
711 }
712
savevm_state_handler_insert(SaveStateEntry * nse)713 static void savevm_state_handler_insert(SaveStateEntry *nse)
714 {
715 MigrationPriority priority = save_state_priority(nse);
716 SaveStateEntry *se;
717 int i;
718
719 assert(priority <= MIG_PRI_MAX);
720
721 /*
722 * This should never happen otherwise migration will probably fail
723 * silently somewhere because we can be wrongly applying one
724 * object properties upon another one. Bail out ASAP.
725 */
726 if (find_se(nse->idstr, nse->instance_id)) {
727 error_report("%s: Detected duplicate SaveStateEntry: "
728 "id=%s, instance_id=0x%"PRIx32, __func__,
729 nse->idstr, nse->instance_id);
730 exit(EXIT_FAILURE);
731 }
732
733 for (i = priority - 1; i >= 0; i--) {
734 se = savevm_state.handler_pri_head[i];
735 if (se != NULL) {
736 assert(save_state_priority(se) < priority);
737 break;
738 }
739 }
740
741 if (i >= 0) {
742 QTAILQ_INSERT_BEFORE(se, nse, entry);
743 } else {
744 QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry);
745 }
746
747 if (savevm_state.handler_pri_head[priority] == NULL) {
748 savevm_state.handler_pri_head[priority] = nse;
749 }
750 }
751
savevm_state_handler_remove(SaveStateEntry * se)752 static void savevm_state_handler_remove(SaveStateEntry *se)
753 {
754 SaveStateEntry *next;
755 MigrationPriority priority = save_state_priority(se);
756
757 if (se == savevm_state.handler_pri_head[priority]) {
758 next = QTAILQ_NEXT(se, entry);
759 if (next != NULL && save_state_priority(next) == priority) {
760 savevm_state.handler_pri_head[priority] = next;
761 } else {
762 savevm_state.handler_pri_head[priority] = NULL;
763 }
764 }
765 QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
766 }
767
768 /* TODO: Individual devices generally have very little idea about the rest
769 of the system, so instance_id should be removed/replaced.
770 Meanwhile pass -1 as instance_id if you do not already have a clearly
771 distinguishing id for all instances of your device class. */
register_savevm_live(const char * idstr,uint32_t instance_id,int version_id,const SaveVMHandlers * ops,void * opaque)772 int register_savevm_live(const char *idstr,
773 uint32_t instance_id,
774 int version_id,
775 const SaveVMHandlers *ops,
776 void *opaque)
777 {
778 SaveStateEntry *se;
779
780 se = g_new0(SaveStateEntry, 1);
781 se->version_id = version_id;
782 se->section_id = savevm_state.global_section_id++;
783 se->ops = ops;
784 se->opaque = opaque;
785 se->vmsd = NULL;
786 /* if this is a live_savem then set is_ram */
787 if (ops->save_setup != NULL) {
788 se->is_ram = 1;
789 }
790
791 pstrcat(se->idstr, sizeof(se->idstr), idstr);
792
793 if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
794 se->instance_id = calculate_new_instance_id(se->idstr);
795 } else {
796 se->instance_id = instance_id;
797 }
798 assert(!se->compat || se->instance_id == 0);
799 savevm_state_handler_insert(se);
800 return 0;
801 }
802
unregister_savevm(VMStateIf * obj,const char * idstr,void * opaque)803 void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque)
804 {
805 SaveStateEntry *se, *new_se;
806 char id[256] = "";
807
808 if (obj) {
809 char *oid = vmstate_if_get_id(obj);
810 if (oid) {
811 pstrcpy(id, sizeof(id), oid);
812 pstrcat(id, sizeof(id), "/");
813 g_free(oid);
814 }
815 }
816 pstrcat(id, sizeof(id), idstr);
817
818 QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
819 if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
820 savevm_state_handler_remove(se);
821 g_free(se->compat);
822 g_free(se);
823 }
824 }
825 }
826
827 /*
828 * Perform some basic checks on vmsd's at registration
829 * time.
830 */
vmstate_check(const VMStateDescription * vmsd)831 static void vmstate_check(const VMStateDescription *vmsd)
832 {
833 const VMStateField *field = vmsd->fields;
834 const VMStateDescription * const *subsection = vmsd->subsections;
835
836 if (field) {
837 while (field->name) {
838 if (field->flags & (VMS_STRUCT | VMS_VSTRUCT)) {
839 /* Recurse to sub structures */
840 vmstate_check(field->vmsd);
841 }
842 /* Carry on */
843 field++;
844 }
845 /* Check for the end of field list canary */
846 if (field->flags != VMS_END) {
847 error_report("VMSTATE not ending with VMS_END: %s", vmsd->name);
848 g_assert_not_reached();
849 }
850 }
851
852 while (subsection && *subsection) {
853 /*
854 * The name of a subsection should start with the name of the
855 * current object.
856 */
857 assert(!strncmp(vmsd->name, (*subsection)->name, strlen(vmsd->name)));
858 vmstate_check(*subsection);
859 subsection++;
860 }
861 }
862
863
vmstate_register_with_alias_id(VMStateIf * obj,uint32_t instance_id,const VMStateDescription * vmsd,void * opaque,int alias_id,int required_for_version,Error ** errp)864 int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
865 const VMStateDescription *vmsd,
866 void *opaque, int alias_id,
867 int required_for_version,
868 Error **errp)
869 {
870 SaveStateEntry *se;
871
872 /* If this triggers, alias support can be dropped for the vmsd. */
873 assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
874
875 se = g_new0(SaveStateEntry, 1);
876 se->version_id = vmsd->version_id;
877 se->section_id = savevm_state.global_section_id++;
878 se->opaque = opaque;
879 se->vmsd = vmsd;
880 se->alias_id = alias_id;
881
882 if (obj) {
883 char *id = vmstate_if_get_id(obj);
884 if (id) {
885 if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >=
886 sizeof(se->idstr)) {
887 error_setg(errp, "Path too long for VMState (%s)", id);
888 g_free(id);
889 g_free(se);
890
891 return -1;
892 }
893 g_free(id);
894
895 se->compat = g_new0(CompatEntry, 1);
896 pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
897 se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ?
898 calculate_compat_instance_id(vmsd->name) : instance_id;
899 instance_id = VMSTATE_INSTANCE_ID_ANY;
900 }
901 }
902 pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
903
904 if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
905 se->instance_id = calculate_new_instance_id(se->idstr);
906 } else {
907 se->instance_id = instance_id;
908 }
909
910 /* Perform a recursive sanity check during the test runs */
911 if (qtest_enabled()) {
912 vmstate_check(vmsd);
913 }
914 assert(!se->compat || se->instance_id == 0);
915 savevm_state_handler_insert(se);
916 return 0;
917 }
918
vmstate_unregister(VMStateIf * obj,const VMStateDescription * vmsd,void * opaque)919 void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd,
920 void *opaque)
921 {
922 SaveStateEntry *se, *new_se;
923
924 QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
925 if (se->vmsd == vmsd && se->opaque == opaque) {
926 savevm_state_handler_remove(se);
927 g_free(se->compat);
928 g_free(se);
929 }
930 }
931 }
932
vmstate_load(QEMUFile * f,SaveStateEntry * se)933 static int vmstate_load(QEMUFile *f, SaveStateEntry *se)
934 {
935 trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
936 if (!se->vmsd) { /* Old style */
937 return se->ops->load_state(f, se->opaque, se->load_version_id);
938 }
939 return vmstate_load_state(f, se->vmsd, se->opaque, se->load_version_id);
940 }
941
vmstate_save_old_style(QEMUFile * f,SaveStateEntry * se,JSONWriter * vmdesc)942 static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se,
943 JSONWriter *vmdesc)
944 {
945 uint64_t old_offset = qemu_file_transferred(f);
946 se->ops->save_state(f, se->opaque);
947 uint64_t size = qemu_file_transferred(f) - old_offset;
948
949 if (vmdesc) {
950 json_writer_int64(vmdesc, "size", size);
951 json_writer_start_array(vmdesc, "fields");
952 json_writer_start_object(vmdesc, NULL);
953 json_writer_str(vmdesc, "name", "data");
954 json_writer_int64(vmdesc, "size", size);
955 json_writer_str(vmdesc, "type", "buffer");
956 json_writer_end_object(vmdesc);
957 json_writer_end_array(vmdesc);
958 }
959 }
960
961 /*
962 * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
963 */
save_section_header(QEMUFile * f,SaveStateEntry * se,uint8_t section_type)964 static void save_section_header(QEMUFile *f, SaveStateEntry *se,
965 uint8_t section_type)
966 {
967 qemu_put_byte(f, section_type);
968 qemu_put_be32(f, se->section_id);
969
970 if (section_type == QEMU_VM_SECTION_FULL ||
971 section_type == QEMU_VM_SECTION_START) {
972 /* ID string */
973 size_t len = strlen(se->idstr);
974 qemu_put_byte(f, len);
975 qemu_put_buffer(f, (uint8_t *)se->idstr, len);
976
977 qemu_put_be32(f, se->instance_id);
978 qemu_put_be32(f, se->version_id);
979 }
980 }
981
982 /*
983 * Write a footer onto device sections that catches cases misformatted device
984 * sections.
985 */
save_section_footer(QEMUFile * f,SaveStateEntry * se)986 static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
987 {
988 if (migrate_get_current()->send_section_footer) {
989 qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
990 qemu_put_be32(f, se->section_id);
991 }
992 }
993
vmstate_save(QEMUFile * f,SaveStateEntry * se,JSONWriter * vmdesc,Error ** errp)994 static int vmstate_save(QEMUFile *f, SaveStateEntry *se, JSONWriter *vmdesc,
995 Error **errp)
996 {
997 int ret;
998
999 if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
1000 return 0;
1001 }
1002 if (se->vmsd && !vmstate_section_needed(se->vmsd, se->opaque)) {
1003 trace_savevm_section_skip(se->idstr, se->section_id);
1004 return 0;
1005 }
1006
1007 trace_savevm_section_start(se->idstr, se->section_id);
1008 save_section_header(f, se, QEMU_VM_SECTION_FULL);
1009 if (vmdesc) {
1010 json_writer_start_object(vmdesc, NULL);
1011 json_writer_str(vmdesc, "name", se->idstr);
1012 json_writer_int64(vmdesc, "instance_id", se->instance_id);
1013 }
1014
1015 trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
1016 if (!se->vmsd) {
1017 vmstate_save_old_style(f, se, vmdesc);
1018 } else {
1019 ret = vmstate_save_state_with_err(f, se->vmsd, se->opaque, vmdesc,
1020 errp);
1021 if (ret) {
1022 return ret;
1023 }
1024 }
1025
1026 trace_savevm_section_end(se->idstr, se->section_id, 0);
1027 save_section_footer(f, se);
1028 if (vmdesc) {
1029 json_writer_end_object(vmdesc);
1030 }
1031 return 0;
1032 }
1033 /**
1034 * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
1035 * command and associated data.
1036 *
1037 * @f: File to send command on
1038 * @command: Command type to send
1039 * @len: Length of associated data
1040 * @data: Data associated with command.
1041 */
qemu_savevm_command_send(QEMUFile * f,enum qemu_vm_cmd command,uint16_t len,uint8_t * data)1042 static void qemu_savevm_command_send(QEMUFile *f,
1043 enum qemu_vm_cmd command,
1044 uint16_t len,
1045 uint8_t *data)
1046 {
1047 trace_savevm_command_send(command, len);
1048 qemu_put_byte(f, QEMU_VM_COMMAND);
1049 qemu_put_be16(f, (uint16_t)command);
1050 qemu_put_be16(f, len);
1051 qemu_put_buffer(f, data, len);
1052 qemu_fflush(f);
1053 }
1054
qemu_savevm_send_colo_enable(QEMUFile * f)1055 void qemu_savevm_send_colo_enable(QEMUFile *f)
1056 {
1057 trace_savevm_send_colo_enable();
1058 qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL);
1059 }
1060
qemu_savevm_send_ping(QEMUFile * f,uint32_t value)1061 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
1062 {
1063 uint32_t buf;
1064
1065 trace_savevm_send_ping(value);
1066 buf = cpu_to_be32(value);
1067 qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf);
1068 }
1069
qemu_savevm_send_open_return_path(QEMUFile * f)1070 void qemu_savevm_send_open_return_path(QEMUFile *f)
1071 {
1072 trace_savevm_send_open_return_path();
1073 qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
1074 }
1075
1076 /* We have a buffer of data to send; we don't want that all to be loaded
1077 * by the command itself, so the command contains just the length of the
1078 * extra buffer that we then send straight after it.
1079 * TODO: Must be a better way to organise that
1080 *
1081 * Returns:
1082 * 0 on success
1083 * -ve on error
1084 */
qemu_savevm_send_packaged(QEMUFile * f,const uint8_t * buf,size_t len)1085 int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
1086 {
1087 uint32_t tmp;
1088 MigrationState *ms = migrate_get_current();
1089 Error *local_err = NULL;
1090
1091 if (len > MAX_VM_CMD_PACKAGED_SIZE) {
1092 error_setg(&local_err, "%s: Unreasonably large packaged state: %zu",
1093 __func__, len);
1094 migrate_set_error(ms, local_err);
1095 error_report_err(local_err);
1096 return -1;
1097 }
1098
1099 tmp = cpu_to_be32(len);
1100
1101 trace_qemu_savevm_send_packaged();
1102 qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
1103
1104 qemu_put_buffer(f, buf, len);
1105
1106 return 0;
1107 }
1108
1109 /* Send prior to any postcopy transfer */
qemu_savevm_send_postcopy_advise(QEMUFile * f)1110 void qemu_savevm_send_postcopy_advise(QEMUFile *f)
1111 {
1112 if (migrate_postcopy_ram()) {
1113 uint64_t tmp[2];
1114 tmp[0] = cpu_to_be64(ram_pagesize_summary());
1115 tmp[1] = cpu_to_be64(qemu_target_page_size());
1116
1117 trace_qemu_savevm_send_postcopy_advise();
1118 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE,
1119 16, (uint8_t *)tmp);
1120 } else {
1121 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 0, NULL);
1122 }
1123 }
1124
1125 /* Sent prior to starting the destination running in postcopy, discard pages
1126 * that have already been sent but redirtied on the source.
1127 * CMD_POSTCOPY_RAM_DISCARD consist of:
1128 * byte version (0)
1129 * byte Length of name field (not including 0)
1130 * n x byte RAM block name
1131 * byte 0 terminator (just for safety)
1132 * n x Byte ranges within the named RAMBlock
1133 * be64 Start of the range
1134 * be64 Length
1135 *
1136 * name: RAMBlock name that these entries are part of
1137 * len: Number of page entries
1138 * start_list: 'len' addresses
1139 * length_list: 'len' addresses
1140 *
1141 */
qemu_savevm_send_postcopy_ram_discard(QEMUFile * f,const char * name,uint16_t len,uint64_t * start_list,uint64_t * length_list)1142 void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
1143 uint16_t len,
1144 uint64_t *start_list,
1145 uint64_t *length_list)
1146 {
1147 uint8_t *buf;
1148 uint16_t tmplen;
1149 uint16_t t;
1150 size_t name_len = strlen(name);
1151
1152 trace_qemu_savevm_send_postcopy_ram_discard(name, len);
1153 assert(name_len < 256);
1154 buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len);
1155 buf[0] = postcopy_ram_discard_version;
1156 buf[1] = name_len;
1157 memcpy(buf + 2, name, name_len);
1158 tmplen = 2 + name_len;
1159 buf[tmplen++] = '\0';
1160
1161 for (t = 0; t < len; t++) {
1162 stq_be_p(buf + tmplen, start_list[t]);
1163 tmplen += 8;
1164 stq_be_p(buf + tmplen, length_list[t]);
1165 tmplen += 8;
1166 }
1167 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
1168 g_free(buf);
1169 }
1170
1171 /* Get the destination into a state where it can receive postcopy data. */
qemu_savevm_send_postcopy_listen(QEMUFile * f)1172 void qemu_savevm_send_postcopy_listen(QEMUFile *f)
1173 {
1174 trace_savevm_send_postcopy_listen();
1175 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
1176 }
1177
1178 /* Kick the destination into running */
qemu_savevm_send_postcopy_run(QEMUFile * f)1179 void qemu_savevm_send_postcopy_run(QEMUFile *f)
1180 {
1181 trace_savevm_send_postcopy_run();
1182 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
1183 }
1184
qemu_savevm_send_postcopy_resume(QEMUFile * f)1185 void qemu_savevm_send_postcopy_resume(QEMUFile *f)
1186 {
1187 trace_savevm_send_postcopy_resume();
1188 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RESUME, 0, NULL);
1189 }
1190
qemu_savevm_send_recv_bitmap(QEMUFile * f,char * block_name)1191 void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name)
1192 {
1193 size_t len;
1194 char buf[256];
1195
1196 trace_savevm_send_recv_bitmap(block_name);
1197
1198 buf[0] = len = strlen(block_name);
1199 memcpy(buf + 1, block_name, len);
1200
1201 qemu_savevm_command_send(f, MIG_CMD_RECV_BITMAP, len + 1, (uint8_t *)buf);
1202 }
1203
qemu_savevm_state_blocked(Error ** errp)1204 bool qemu_savevm_state_blocked(Error **errp)
1205 {
1206 SaveStateEntry *se;
1207
1208 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1209 if (se->vmsd && se->vmsd->unmigratable) {
1210 error_setg(errp, "State blocked by non-migratable device '%s'",
1211 se->idstr);
1212 return true;
1213 }
1214 }
1215 return false;
1216 }
1217
qemu_savevm_non_migratable_list(strList ** reasons)1218 void qemu_savevm_non_migratable_list(strList **reasons)
1219 {
1220 SaveStateEntry *se;
1221
1222 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1223 if (se->vmsd && se->vmsd->unmigratable) {
1224 QAPI_LIST_PREPEND(*reasons,
1225 g_strdup_printf("non-migratable device: %s",
1226 se->idstr));
1227 }
1228 }
1229 }
1230
qemu_savevm_state_header(QEMUFile * f)1231 void qemu_savevm_state_header(QEMUFile *f)
1232 {
1233 MigrationState *s = migrate_get_current();
1234
1235 s->vmdesc = json_writer_new(false);
1236
1237 trace_savevm_state_header();
1238 qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
1239 qemu_put_be32(f, QEMU_VM_FILE_VERSION);
1240
1241 if (s->send_configuration) {
1242 qemu_put_byte(f, QEMU_VM_CONFIGURATION);
1243
1244 /*
1245 * This starts the main json object and is paired with the
1246 * json_writer_end_object in
1247 * qemu_savevm_state_complete_precopy_non_iterable
1248 */
1249 json_writer_start_object(s->vmdesc, NULL);
1250
1251 json_writer_start_object(s->vmdesc, "configuration");
1252 vmstate_save_state(f, &vmstate_configuration, &savevm_state, s->vmdesc);
1253 json_writer_end_object(s->vmdesc);
1254 }
1255 }
1256
qemu_savevm_state_guest_unplug_pending(void)1257 bool qemu_savevm_state_guest_unplug_pending(void)
1258 {
1259 SaveStateEntry *se;
1260
1261 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1262 if (se->vmsd && se->vmsd->dev_unplug_pending &&
1263 se->vmsd->dev_unplug_pending(se->opaque)) {
1264 return true;
1265 }
1266 }
1267
1268 return false;
1269 }
1270
qemu_savevm_state_prepare(Error ** errp)1271 int qemu_savevm_state_prepare(Error **errp)
1272 {
1273 SaveStateEntry *se;
1274 int ret;
1275
1276 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1277 if (!se->ops || !se->ops->save_prepare) {
1278 continue;
1279 }
1280 if (se->ops->is_active) {
1281 if (!se->ops->is_active(se->opaque)) {
1282 continue;
1283 }
1284 }
1285
1286 ret = se->ops->save_prepare(se->opaque, errp);
1287 if (ret < 0) {
1288 return ret;
1289 }
1290 }
1291
1292 return 0;
1293 }
1294
qemu_savevm_state_setup(QEMUFile * f,Error ** errp)1295 int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
1296 {
1297 ERRP_GUARD();
1298 MigrationState *ms = migrate_get_current();
1299 SaveStateEntry *se;
1300 int ret = 0;
1301
1302 json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
1303 json_writer_start_array(ms->vmdesc, "devices");
1304
1305 trace_savevm_state_setup();
1306 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1307 if (se->vmsd && se->vmsd->early_setup) {
1308 ret = vmstate_save(f, se, ms->vmdesc, errp);
1309 if (ret) {
1310 migrate_set_error(ms, *errp);
1311 qemu_file_set_error(f, ret);
1312 break;
1313 }
1314 continue;
1315 }
1316
1317 if (!se->ops || !se->ops->save_setup) {
1318 continue;
1319 }
1320 if (se->ops->is_active) {
1321 if (!se->ops->is_active(se->opaque)) {
1322 continue;
1323 }
1324 }
1325 save_section_header(f, se, QEMU_VM_SECTION_START);
1326
1327 ret = se->ops->save_setup(f, se->opaque, errp);
1328 save_section_footer(f, se);
1329 if (ret < 0) {
1330 qemu_file_set_error(f, ret);
1331 break;
1332 }
1333 }
1334
1335 if (ret) {
1336 return ret;
1337 }
1338
1339 /* TODO: Should we check that errp is set in case of failure ? */
1340 return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
1341 }
1342
qemu_savevm_state_resume_prepare(MigrationState * s)1343 int qemu_savevm_state_resume_prepare(MigrationState *s)
1344 {
1345 SaveStateEntry *se;
1346 int ret;
1347
1348 trace_savevm_state_resume_prepare();
1349
1350 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1351 if (!se->ops || !se->ops->resume_prepare) {
1352 continue;
1353 }
1354 if (se->ops->is_active) {
1355 if (!se->ops->is_active(se->opaque)) {
1356 continue;
1357 }
1358 }
1359 ret = se->ops->resume_prepare(s, se->opaque);
1360 if (ret < 0) {
1361 return ret;
1362 }
1363 }
1364
1365 return 0;
1366 }
1367
1368 /*
1369 * this function has three return values:
1370 * negative: there was one error, and we have -errno.
1371 * 0 : We haven't finished, caller have to go again
1372 * 1 : We have finished, we can go to complete phase
1373 */
qemu_savevm_state_iterate(QEMUFile * f,bool postcopy)1374 int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
1375 {
1376 SaveStateEntry *se;
1377 bool all_finished = true;
1378 int ret;
1379
1380 trace_savevm_state_iterate();
1381 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1382 if (!se->ops || !se->ops->save_live_iterate) {
1383 continue;
1384 }
1385 if (se->ops->is_active &&
1386 !se->ops->is_active(se->opaque)) {
1387 continue;
1388 }
1389 if (se->ops->is_active_iterate &&
1390 !se->ops->is_active_iterate(se->opaque)) {
1391 continue;
1392 }
1393 /*
1394 * In the postcopy phase, any device that doesn't know how to
1395 * do postcopy should have saved it's state in the _complete
1396 * call that's already run, it might get confused if we call
1397 * iterate afterwards.
1398 */
1399 if (postcopy &&
1400 !(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) {
1401 continue;
1402 }
1403 if (migration_rate_exceeded(f)) {
1404 return 0;
1405 }
1406 trace_savevm_section_start(se->idstr, se->section_id);
1407
1408 save_section_header(f, se, QEMU_VM_SECTION_PART);
1409
1410 ret = se->ops->save_live_iterate(f, se->opaque);
1411 trace_savevm_section_end(se->idstr, se->section_id, ret);
1412 save_section_footer(f, se);
1413
1414 if (ret < 0) {
1415 error_report("failed to save SaveStateEntry with id(name): "
1416 "%d(%s): %d",
1417 se->section_id, se->idstr, ret);
1418 qemu_file_set_error(f, ret);
1419 return ret;
1420 } else if (!ret) {
1421 all_finished = false;
1422 }
1423 }
1424 return all_finished;
1425 }
1426
should_send_vmdesc(void)1427 static bool should_send_vmdesc(void)
1428 {
1429 MachineState *machine = MACHINE(qdev_get_machine());
1430 bool in_postcopy = migration_in_postcopy();
1431 return !machine->suppress_vmdesc && !in_postcopy;
1432 }
1433
1434 /*
1435 * Calls the save_live_complete_postcopy methods
1436 * causing the last few pages to be sent immediately and doing any associated
1437 * cleanup.
1438 * Note postcopy also calls qemu_savevm_state_complete_precopy to complete
1439 * all the other devices, but that happens at the point we switch to postcopy.
1440 */
qemu_savevm_state_complete_postcopy(QEMUFile * f)1441 void qemu_savevm_state_complete_postcopy(QEMUFile *f)
1442 {
1443 SaveStateEntry *se;
1444 int ret;
1445
1446 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1447 if (!se->ops || !se->ops->save_live_complete_postcopy) {
1448 continue;
1449 }
1450 if (se->ops->is_active) {
1451 if (!se->ops->is_active(se->opaque)) {
1452 continue;
1453 }
1454 }
1455 trace_savevm_section_start(se->idstr, se->section_id);
1456 /* Section type */
1457 qemu_put_byte(f, QEMU_VM_SECTION_END);
1458 qemu_put_be32(f, se->section_id);
1459
1460 ret = se->ops->save_live_complete_postcopy(f, se->opaque);
1461 trace_savevm_section_end(se->idstr, se->section_id, ret);
1462 save_section_footer(f, se);
1463 if (ret < 0) {
1464 qemu_file_set_error(f, ret);
1465 return;
1466 }
1467 }
1468
1469 qemu_put_byte(f, QEMU_VM_EOF);
1470 qemu_fflush(f);
1471 }
1472
1473 static
qemu_savevm_state_complete_precopy_iterable(QEMUFile * f,bool in_postcopy)1474 int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
1475 {
1476 int64_t start_ts_each, end_ts_each;
1477 SaveStateEntry *se;
1478 int ret;
1479
1480 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1481 if (!se->ops ||
1482 (in_postcopy && se->ops->has_postcopy &&
1483 se->ops->has_postcopy(se->opaque)) ||
1484 !se->ops->save_live_complete_precopy) {
1485 continue;
1486 }
1487
1488 if (se->ops->is_active) {
1489 if (!se->ops->is_active(se->opaque)) {
1490 continue;
1491 }
1492 }
1493
1494 start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
1495 trace_savevm_section_start(se->idstr, se->section_id);
1496
1497 save_section_header(f, se, QEMU_VM_SECTION_END);
1498
1499 ret = se->ops->save_live_complete_precopy(f, se->opaque);
1500 trace_savevm_section_end(se->idstr, se->section_id, ret);
1501 save_section_footer(f, se);
1502 if (ret < 0) {
1503 qemu_file_set_error(f, ret);
1504 return -1;
1505 }
1506 end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
1507 trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id,
1508 end_ts_each - start_ts_each);
1509 }
1510
1511 trace_vmstate_downtime_checkpoint("src-iterable-saved");
1512
1513 return 0;
1514 }
1515
qemu_savevm_state_complete_precopy_non_iterable(QEMUFile * f,bool in_postcopy,bool inactivate_disks)1516 int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
1517 bool in_postcopy,
1518 bool inactivate_disks)
1519 {
1520 MigrationState *ms = migrate_get_current();
1521 int64_t start_ts_each, end_ts_each;
1522 JSONWriter *vmdesc = ms->vmdesc;
1523 int vmdesc_len;
1524 SaveStateEntry *se;
1525 Error *local_err = NULL;
1526 int ret;
1527
1528 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1529 if (se->vmsd && se->vmsd->early_setup) {
1530 /* Already saved during qemu_savevm_state_setup(). */
1531 continue;
1532 }
1533
1534 start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
1535
1536 ret = vmstate_save(f, se, vmdesc, &local_err);
1537 if (ret) {
1538 migrate_set_error(ms, local_err);
1539 error_report_err(local_err);
1540 qemu_file_set_error(f, ret);
1541 return ret;
1542 }
1543
1544 end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
1545 trace_vmstate_downtime_save("non-iterable", se->idstr, se->instance_id,
1546 end_ts_each - start_ts_each);
1547 }
1548
1549 if (inactivate_disks) {
1550 /* Inactivate before sending QEMU_VM_EOF so that the
1551 * bdrv_activate_all() on the other end won't fail. */
1552 ret = bdrv_inactivate_all();
1553 if (ret) {
1554 error_setg(&local_err, "%s: bdrv_inactivate_all() failed (%d)",
1555 __func__, ret);
1556 migrate_set_error(ms, local_err);
1557 error_report_err(local_err);
1558 qemu_file_set_error(f, ret);
1559 return ret;
1560 }
1561 }
1562 if (!in_postcopy) {
1563 /* Postcopy stream will still be going */
1564 qemu_put_byte(f, QEMU_VM_EOF);
1565 }
1566
1567 json_writer_end_array(vmdesc);
1568 json_writer_end_object(vmdesc);
1569 vmdesc_len = strlen(json_writer_get(vmdesc));
1570
1571 if (should_send_vmdesc()) {
1572 qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
1573 qemu_put_be32(f, vmdesc_len);
1574 qemu_put_buffer(f, (uint8_t *)json_writer_get(vmdesc), vmdesc_len);
1575 }
1576
1577 /* Free it now to detect any inconsistencies. */
1578 json_writer_free(vmdesc);
1579 ms->vmdesc = NULL;
1580
1581 trace_vmstate_downtime_checkpoint("src-non-iterable-saved");
1582
1583 return 0;
1584 }
1585
qemu_savevm_state_complete_precopy(QEMUFile * f,bool iterable_only,bool inactivate_disks)1586 int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
1587 bool inactivate_disks)
1588 {
1589 int ret;
1590 Error *local_err = NULL;
1591 bool in_postcopy = migration_in_postcopy();
1592
1593 if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
1594 error_report_err(local_err);
1595 }
1596
1597 trace_savevm_state_complete_precopy();
1598
1599 cpu_synchronize_all_states();
1600
1601 if (!in_postcopy || iterable_only) {
1602 ret = qemu_savevm_state_complete_precopy_iterable(f, in_postcopy);
1603 if (ret) {
1604 return ret;
1605 }
1606 }
1607
1608 if (iterable_only) {
1609 goto flush;
1610 }
1611
1612 ret = qemu_savevm_state_complete_precopy_non_iterable(f, in_postcopy,
1613 inactivate_disks);
1614 if (ret) {
1615 return ret;
1616 }
1617
1618 flush:
1619 return qemu_fflush(f);
1620 }
1621
1622 /* Give an estimate of the amount left to be transferred,
1623 * the result is split into the amount for units that can and
1624 * for units that can't do postcopy.
1625 */
qemu_savevm_state_pending_estimate(uint64_t * must_precopy,uint64_t * can_postcopy)1626 void qemu_savevm_state_pending_estimate(uint64_t *must_precopy,
1627 uint64_t *can_postcopy)
1628 {
1629 SaveStateEntry *se;
1630
1631 *must_precopy = 0;
1632 *can_postcopy = 0;
1633
1634 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1635 if (!se->ops || !se->ops->state_pending_estimate) {
1636 continue;
1637 }
1638 if (se->ops->is_active) {
1639 if (!se->ops->is_active(se->opaque)) {
1640 continue;
1641 }
1642 }
1643 se->ops->state_pending_estimate(se->opaque, must_precopy, can_postcopy);
1644 }
1645 }
1646
qemu_savevm_state_pending_exact(uint64_t * must_precopy,uint64_t * can_postcopy)1647 void qemu_savevm_state_pending_exact(uint64_t *must_precopy,
1648 uint64_t *can_postcopy)
1649 {
1650 SaveStateEntry *se;
1651
1652 *must_precopy = 0;
1653 *can_postcopy = 0;
1654
1655 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1656 if (!se->ops || !se->ops->state_pending_exact) {
1657 continue;
1658 }
1659 if (se->ops->is_active) {
1660 if (!se->ops->is_active(se->opaque)) {
1661 continue;
1662 }
1663 }
1664 se->ops->state_pending_exact(se->opaque, must_precopy, can_postcopy);
1665 }
1666 }
1667
qemu_savevm_state_cleanup(void)1668 void qemu_savevm_state_cleanup(void)
1669 {
1670 SaveStateEntry *se;
1671 Error *local_err = NULL;
1672
1673 if (precopy_notify(PRECOPY_NOTIFY_CLEANUP, &local_err)) {
1674 error_report_err(local_err);
1675 }
1676
1677 trace_savevm_state_cleanup();
1678 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1679 if (se->ops && se->ops->save_cleanup) {
1680 se->ops->save_cleanup(se->opaque);
1681 }
1682 }
1683 }
1684
qemu_savevm_state(QEMUFile * f,Error ** errp)1685 static int qemu_savevm_state(QEMUFile *f, Error **errp)
1686 {
1687 int ret;
1688 MigrationState *ms = migrate_get_current();
1689 MigrationStatus status;
1690
1691 if (migration_is_running()) {
1692 error_setg(errp, "There's a migration process in progress");
1693 return -EINVAL;
1694 }
1695
1696 ret = migrate_init(ms, errp);
1697 if (ret) {
1698 return ret;
1699 }
1700 ms->to_dst_file = f;
1701
1702 qemu_savevm_state_header(f);
1703 ret = qemu_savevm_state_setup(f, errp);
1704 if (ret) {
1705 goto cleanup;
1706 }
1707
1708 while (qemu_file_get_error(f) == 0) {
1709 if (qemu_savevm_state_iterate(f, false) > 0) {
1710 break;
1711 }
1712 }
1713
1714 ret = qemu_file_get_error(f);
1715 if (ret == 0) {
1716 qemu_savevm_state_complete_precopy(f, false, false);
1717 ret = qemu_file_get_error(f);
1718 }
1719 if (ret != 0) {
1720 error_setg_errno(errp, -ret, "Error while writing VM state");
1721 }
1722 cleanup:
1723 qemu_savevm_state_cleanup();
1724
1725 if (ret != 0) {
1726 status = MIGRATION_STATUS_FAILED;
1727 } else {
1728 status = MIGRATION_STATUS_COMPLETED;
1729 }
1730 migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
1731
1732 /* f is outer parameter, it should not stay in global migration state after
1733 * this function finished */
1734 ms->to_dst_file = NULL;
1735
1736 return ret;
1737 }
1738
qemu_savevm_live_state(QEMUFile * f)1739 void qemu_savevm_live_state(QEMUFile *f)
1740 {
1741 /* save QEMU_VM_SECTION_END section */
1742 qemu_savevm_state_complete_precopy(f, true, false);
1743 qemu_put_byte(f, QEMU_VM_EOF);
1744 }
1745
qemu_save_device_state(QEMUFile * f)1746 int qemu_save_device_state(QEMUFile *f)
1747 {
1748 MigrationState *ms = migrate_get_current();
1749 Error *local_err = NULL;
1750 SaveStateEntry *se;
1751
1752 if (!migration_in_colo_state()) {
1753 qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
1754 qemu_put_be32(f, QEMU_VM_FILE_VERSION);
1755 }
1756 cpu_synchronize_all_states();
1757
1758 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1759 int ret;
1760
1761 if (se->is_ram) {
1762 continue;
1763 }
1764 ret = vmstate_save(f, se, NULL, &local_err);
1765 if (ret) {
1766 migrate_set_error(ms, local_err);
1767 error_report_err(local_err);
1768 return ret;
1769 }
1770 }
1771
1772 qemu_put_byte(f, QEMU_VM_EOF);
1773
1774 return qemu_file_get_error(f);
1775 }
1776
find_se(const char * idstr,uint32_t instance_id)1777 static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id)
1778 {
1779 SaveStateEntry *se;
1780
1781 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1782 if (!strcmp(se->idstr, idstr) &&
1783 (instance_id == se->instance_id ||
1784 instance_id == se->alias_id))
1785 return se;
1786 /* Migrating from an older version? */
1787 if (strstr(se->idstr, idstr) && se->compat) {
1788 if (!strcmp(se->compat->idstr, idstr) &&
1789 (instance_id == se->compat->instance_id ||
1790 instance_id == se->alias_id))
1791 return se;
1792 }
1793 }
1794 return NULL;
1795 }
1796
1797 enum LoadVMExitCodes {
1798 /* Allow a command to quit all layers of nested loadvm loops */
1799 LOADVM_QUIT = 1,
1800 };
1801
1802 /* ------ incoming postcopy messages ------ */
1803 /* 'advise' arrives before any transfers just to tell us that a postcopy
1804 * *might* happen - it might be skipped if precopy transferred everything
1805 * quickly.
1806 */
loadvm_postcopy_handle_advise(MigrationIncomingState * mis,uint16_t len)1807 static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
1808 uint16_t len)
1809 {
1810 PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
1811 uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
1812 size_t page_size = qemu_target_page_size();
1813 Error *local_err = NULL;
1814
1815 trace_loadvm_postcopy_handle_advise();
1816 if (ps != POSTCOPY_INCOMING_NONE) {
1817 error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
1818 return -1;
1819 }
1820
1821 switch (len) {
1822 case 0:
1823 if (migrate_postcopy_ram()) {
1824 error_report("RAM postcopy is enabled but have 0 byte advise");
1825 return -EINVAL;
1826 }
1827 return 0;
1828 case 8 + 8:
1829 if (!migrate_postcopy_ram()) {
1830 error_report("RAM postcopy is disabled but have 16 byte advise");
1831 return -EINVAL;
1832 }
1833 break;
1834 default:
1835 error_report("CMD_POSTCOPY_ADVISE invalid length (%d)", len);
1836 return -EINVAL;
1837 }
1838
1839 if (!postcopy_ram_supported_by_host(mis, &local_err)) {
1840 error_report_err(local_err);
1841 postcopy_state_set(POSTCOPY_INCOMING_NONE);
1842 return -1;
1843 }
1844
1845 remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
1846 local_pagesize_summary = ram_pagesize_summary();
1847
1848 if (remote_pagesize_summary != local_pagesize_summary) {
1849 /*
1850 * This detects two potential causes of mismatch:
1851 * a) A mismatch in host page sizes
1852 * Some combinations of mismatch are probably possible but it gets
1853 * a bit more complicated. In particular we need to place whole
1854 * host pages on the dest at once, and we need to ensure that we
1855 * handle dirtying to make sure we never end up sending part of
1856 * a hostpage on it's own.
1857 * b) The use of different huge page sizes on source/destination
1858 * a more fine grain test is performed during RAM block migration
1859 * but this test here causes a nice early clear failure, and
1860 * also fails when passed to an older qemu that doesn't
1861 * do huge pages.
1862 */
1863 error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
1864 " d=%" PRIx64 ")",
1865 remote_pagesize_summary, local_pagesize_summary);
1866 return -1;
1867 }
1868
1869 remote_tps = qemu_get_be64(mis->from_src_file);
1870 if (remote_tps != page_size) {
1871 /*
1872 * Again, some differences could be dealt with, but for now keep it
1873 * simple.
1874 */
1875 error_report("Postcopy needs matching target page sizes (s=%d d=%zd)",
1876 (int)remote_tps, page_size);
1877 return -1;
1878 }
1879
1880 if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) {
1881 error_report_err(local_err);
1882 return -1;
1883 }
1884
1885 if (ram_postcopy_incoming_init(mis)) {
1886 return -1;
1887 }
1888
1889 return 0;
1890 }
1891
1892 /* After postcopy we will be told to throw some pages away since they're
1893 * dirty and will have to be demand fetched. Must happen before CPU is
1894 * started.
1895 * There can be 0..many of these messages, each encoding multiple pages.
1896 */
loadvm_postcopy_ram_handle_discard(MigrationIncomingState * mis,uint16_t len)1897 static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
1898 uint16_t len)
1899 {
1900 int tmp;
1901 char ramid[256];
1902 PostcopyState ps = postcopy_state_get();
1903
1904 trace_loadvm_postcopy_ram_handle_discard();
1905
1906 switch (ps) {
1907 case POSTCOPY_INCOMING_ADVISE:
1908 /* 1st discard */
1909 tmp = postcopy_ram_prepare_discard(mis);
1910 if (tmp) {
1911 return tmp;
1912 }
1913 break;
1914
1915 case POSTCOPY_INCOMING_DISCARD:
1916 /* Expected state */
1917 break;
1918
1919 default:
1920 error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
1921 ps);
1922 return -1;
1923 }
1924 /* We're expecting a
1925 * Version (0)
1926 * a RAM ID string (length byte, name, 0 term)
1927 * then at least 1 16 byte chunk
1928 */
1929 if (len < (1 + 1 + 1 + 1 + 2 * 8)) {
1930 error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1931 return -1;
1932 }
1933
1934 tmp = qemu_get_byte(mis->from_src_file);
1935 if (tmp != postcopy_ram_discard_version) {
1936 error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
1937 return -1;
1938 }
1939
1940 if (!qemu_get_counted_string(mis->from_src_file, ramid)) {
1941 error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
1942 return -1;
1943 }
1944 tmp = qemu_get_byte(mis->from_src_file);
1945 if (tmp != 0) {
1946 error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp);
1947 return -1;
1948 }
1949
1950 len -= 3 + strlen(ramid);
1951 if (len % 16) {
1952 error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1953 return -1;
1954 }
1955 trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
1956 while (len) {
1957 uint64_t start_addr, block_length;
1958 start_addr = qemu_get_be64(mis->from_src_file);
1959 block_length = qemu_get_be64(mis->from_src_file);
1960
1961 len -= 16;
1962 int ret = ram_discard_range(ramid, start_addr, block_length);
1963 if (ret) {
1964 return ret;
1965 }
1966 }
1967 trace_loadvm_postcopy_ram_handle_discard_end();
1968
1969 return 0;
1970 }
1971
1972 /*
1973 * Triggered by a postcopy_listen command; this thread takes over reading
1974 * the input stream, leaving the main thread free to carry on loading the rest
1975 * of the device state (from RAM).
1976 * (TODO:This could do with being in a postcopy file - but there again it's
1977 * just another input loop, not that postcopy specific)
1978 */
postcopy_ram_listen_thread(void * opaque)1979 static void *postcopy_ram_listen_thread(void *opaque)
1980 {
1981 MigrationIncomingState *mis = migration_incoming_get_current();
1982 QEMUFile *f = mis->from_src_file;
1983 int load_res;
1984 MigrationState *migr = migrate_get_current();
1985
1986 object_ref(OBJECT(migr));
1987
1988 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
1989 MIGRATION_STATUS_POSTCOPY_ACTIVE);
1990 qemu_sem_post(&mis->thread_sync_sem);
1991 trace_postcopy_ram_listen_thread_start();
1992
1993 rcu_register_thread();
1994 /*
1995 * Because we're a thread and not a coroutine we can't yield
1996 * in qemu_file, and thus we must be blocking now.
1997 */
1998 qemu_file_set_blocking(f, true);
1999 load_res = qemu_loadvm_state_main(f, mis);
2000
2001 /*
2002 * This is tricky, but, mis->from_src_file can change after it
2003 * returns, when postcopy recovery happened. In the future, we may
2004 * want a wrapper for the QEMUFile handle.
2005 */
2006 f = mis->from_src_file;
2007
2008 /* And non-blocking again so we don't block in any cleanup */
2009 qemu_file_set_blocking(f, false);
2010
2011 trace_postcopy_ram_listen_thread_exit();
2012 if (load_res < 0) {
2013 qemu_file_set_error(f, load_res);
2014 dirty_bitmap_mig_cancel_incoming();
2015 if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
2016 !migrate_postcopy_ram() && migrate_dirty_bitmaps())
2017 {
2018 error_report("%s: loadvm failed during postcopy: %d. All states "
2019 "are migrated except dirty bitmaps. Some dirty "
2020 "bitmaps may be lost, and present migrated dirty "
2021 "bitmaps are correctly migrated and valid.",
2022 __func__, load_res);
2023 load_res = 0; /* prevent further exit() */
2024 } else {
2025 error_report("%s: loadvm failed: %d", __func__, load_res);
2026 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2027 MIGRATION_STATUS_FAILED);
2028 }
2029 }
2030 if (load_res >= 0) {
2031 /*
2032 * This looks good, but it's possible that the device loading in the
2033 * main thread hasn't finished yet, and so we might not be in 'RUN'
2034 * state yet; wait for the end of the main thread.
2035 */
2036 qemu_event_wait(&mis->main_thread_load_event);
2037 }
2038 postcopy_ram_incoming_cleanup(mis);
2039
2040 if (load_res < 0) {
2041 /*
2042 * If something went wrong then we have a bad state so exit;
2043 * depending how far we got it might be possible at this point
2044 * to leave the guest running and fire MCEs for pages that never
2045 * arrived as a desperate recovery step.
2046 */
2047 rcu_unregister_thread();
2048 exit(EXIT_FAILURE);
2049 }
2050
2051 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2052 MIGRATION_STATUS_COMPLETED);
2053 /*
2054 * If everything has worked fine, then the main thread has waited
2055 * for us to start, and we're the last use of the mis.
2056 * (If something broke then qemu will have to exit anyway since it's
2057 * got a bad migration state).
2058 */
2059 migration_incoming_state_destroy();
2060
2061 rcu_unregister_thread();
2062 mis->have_listen_thread = false;
2063 postcopy_state_set(POSTCOPY_INCOMING_END);
2064
2065 object_unref(OBJECT(migr));
2066
2067 return NULL;
2068 }
2069
2070 /* After this message we must be able to immediately receive postcopy data */
loadvm_postcopy_handle_listen(MigrationIncomingState * mis)2071 static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
2072 {
2073 PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
2074 Error *local_err = NULL;
2075
2076 trace_loadvm_postcopy_handle_listen("enter");
2077
2078 if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
2079 error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
2080 return -1;
2081 }
2082 if (ps == POSTCOPY_INCOMING_ADVISE) {
2083 /*
2084 * A rare case, we entered listen without having to do any discards,
2085 * so do the setup that's normally done at the time of the 1st discard.
2086 */
2087 if (migrate_postcopy_ram()) {
2088 postcopy_ram_prepare_discard(mis);
2089 }
2090 }
2091
2092 trace_loadvm_postcopy_handle_listen("after discard");
2093
2094 /*
2095 * Sensitise RAM - can now generate requests for blocks that don't exist
2096 * However, at this point the CPU shouldn't be running, and the IO
2097 * shouldn't be doing anything yet so don't actually expect requests
2098 */
2099 if (migrate_postcopy_ram()) {
2100 if (postcopy_ram_incoming_setup(mis)) {
2101 postcopy_ram_incoming_cleanup(mis);
2102 return -1;
2103 }
2104 }
2105
2106 trace_loadvm_postcopy_handle_listen("after uffd");
2107
2108 if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) {
2109 error_report_err(local_err);
2110 return -1;
2111 }
2112
2113 mis->have_listen_thread = true;
2114 postcopy_thread_create(mis, &mis->listen_thread,
2115 MIGRATION_THREAD_DST_LISTEN,
2116 postcopy_ram_listen_thread, QEMU_THREAD_DETACHED);
2117 trace_loadvm_postcopy_handle_listen("return");
2118
2119 return 0;
2120 }
2121
loadvm_postcopy_handle_run_bh(void * opaque)2122 static void loadvm_postcopy_handle_run_bh(void *opaque)
2123 {
2124 Error *local_err = NULL;
2125 MigrationIncomingState *mis = opaque;
2126
2127 trace_vmstate_downtime_checkpoint("dst-postcopy-bh-enter");
2128
2129 /* TODO we should move all of this lot into postcopy_ram.c or a shared code
2130 * in migration.c
2131 */
2132 cpu_synchronize_all_post_init();
2133
2134 trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cpu-synced");
2135
2136 qemu_announce_self(&mis->announce_timer, migrate_announce_params());
2137
2138 trace_vmstate_downtime_checkpoint("dst-postcopy-bh-announced");
2139
2140 /* Make sure all file formats throw away their mutable metadata.
2141 * If we get an error here, just don't restart the VM yet. */
2142 bdrv_activate_all(&local_err);
2143 if (local_err) {
2144 error_report_err(local_err);
2145 local_err = NULL;
2146 autostart = false;
2147 }
2148
2149 trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cache-invalidated");
2150
2151 dirty_bitmap_mig_before_vm_start();
2152
2153 if (autostart) {
2154 /* Hold onto your hats, starting the CPU */
2155 vm_start();
2156 } else {
2157 /* leave it paused and let management decide when to start the CPU */
2158 runstate_set(RUN_STATE_PAUSED);
2159 }
2160
2161 trace_vmstate_downtime_checkpoint("dst-postcopy-bh-vm-started");
2162 }
2163
2164 /* After all discards we can start running and asking for pages */
loadvm_postcopy_handle_run(MigrationIncomingState * mis)2165 static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
2166 {
2167 PostcopyState ps = postcopy_state_get();
2168
2169 trace_loadvm_postcopy_handle_run();
2170 if (ps != POSTCOPY_INCOMING_LISTENING) {
2171 error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
2172 return -1;
2173 }
2174
2175 postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
2176 migration_bh_schedule(loadvm_postcopy_handle_run_bh, mis);
2177
2178 /* We need to finish reading the stream from the package
2179 * and also stop reading anything more from the stream that loaded the
2180 * package (since it's now being read by the listener thread).
2181 * LOADVM_QUIT will quit all the layers of nested loadvm loops.
2182 */
2183 return LOADVM_QUIT;
2184 }
2185
2186 /* We must be with page_request_mutex held */
postcopy_sync_page_req(gpointer key,gpointer value,gpointer data)2187 static gboolean postcopy_sync_page_req(gpointer key, gpointer value,
2188 gpointer data)
2189 {
2190 MigrationIncomingState *mis = data;
2191 void *host_addr = (void *) key;
2192 ram_addr_t rb_offset;
2193 RAMBlock *rb;
2194 int ret;
2195
2196 rb = qemu_ram_block_from_host(host_addr, true, &rb_offset);
2197 if (!rb) {
2198 /*
2199 * This should _never_ happen. However be nice for a migrating VM to
2200 * not crash/assert. Post an error (note: intended to not use *_once
2201 * because we do want to see all the illegal addresses; and this can
2202 * never be triggered by the guest so we're safe) and move on next.
2203 */
2204 error_report("%s: illegal host addr %p", __func__, host_addr);
2205 /* Try the next entry */
2206 return FALSE;
2207 }
2208
2209 ret = migrate_send_rp_message_req_pages(mis, rb, rb_offset);
2210 if (ret) {
2211 /* Please refer to above comment. */
2212 error_report("%s: send rp message failed for addr %p",
2213 __func__, host_addr);
2214 return FALSE;
2215 }
2216
2217 trace_postcopy_page_req_sync(host_addr);
2218
2219 return FALSE;
2220 }
2221
migrate_send_rp_req_pages_pending(MigrationIncomingState * mis)2222 static void migrate_send_rp_req_pages_pending(MigrationIncomingState *mis)
2223 {
2224 WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
2225 g_tree_foreach(mis->page_requested, postcopy_sync_page_req, mis);
2226 }
2227 }
2228
loadvm_postcopy_handle_resume(MigrationIncomingState * mis)2229 static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
2230 {
2231 if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
2232 error_report("%s: illegal resume received", __func__);
2233 /* Don't fail the load, only for this. */
2234 return 0;
2235 }
2236
2237 /*
2238 * Reset the last_rb before we resend any page req to source again, since
2239 * the source should have it reset already.
2240 */
2241 mis->last_rb = NULL;
2242
2243 /*
2244 * This means source VM is ready to resume the postcopy migration.
2245 */
2246 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
2247 MIGRATION_STATUS_POSTCOPY_ACTIVE);
2248
2249 trace_loadvm_postcopy_handle_resume();
2250
2251 /* Tell source that "we are ready" */
2252 migrate_send_rp_resume_ack(mis, MIGRATION_RESUME_ACK_VALUE);
2253
2254 /*
2255 * After a postcopy recovery, the source should have lost the postcopy
2256 * queue, or potentially the requested pages could have been lost during
2257 * the network down phase. Let's re-sync with the source VM by re-sending
2258 * all the pending pages that we eagerly need, so these threads won't get
2259 * blocked too long due to the recovery.
2260 *
2261 * Without this procedure, the faulted destination VM threads (waiting for
2262 * page requests right before the postcopy is interrupted) can keep hanging
2263 * until the pages are sent by the source during the background copying of
2264 * pages, or another thread faulted on the same address accidentally.
2265 */
2266 migrate_send_rp_req_pages_pending(mis);
2267
2268 /*
2269 * It's time to switch state and release the fault thread to continue
2270 * service page faults. Note that this should be explicitly after the
2271 * above call to migrate_send_rp_req_pages_pending(). In short:
2272 * migrate_send_rp_message_req_pages() is not thread safe, yet.
2273 */
2274 qemu_sem_post(&mis->postcopy_pause_sem_fault);
2275
2276 if (migrate_postcopy_preempt()) {
2277 /*
2278 * The preempt channel will be created in async manner, now let's
2279 * wait for it and make sure it's created.
2280 */
2281 qemu_sem_wait(&mis->postcopy_qemufile_dst_done);
2282 assert(mis->postcopy_qemufile_dst);
2283 /* Kick the fast ram load thread too */
2284 qemu_sem_post(&mis->postcopy_pause_sem_fast_load);
2285 }
2286
2287 return 0;
2288 }
2289
2290 /**
2291 * Immediately following this command is a blob of data containing an embedded
2292 * chunk of migration stream; read it and load it.
2293 *
2294 * @mis: Incoming state
2295 * @length: Length of packaged data to read
2296 *
2297 * Returns: Negative values on error
2298 *
2299 */
loadvm_handle_cmd_packaged(MigrationIncomingState * mis)2300 static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
2301 {
2302 int ret;
2303 size_t length;
2304 QIOChannelBuffer *bioc;
2305
2306 length = qemu_get_be32(mis->from_src_file);
2307 trace_loadvm_handle_cmd_packaged(length);
2308
2309 if (length > MAX_VM_CMD_PACKAGED_SIZE) {
2310 error_report("Unreasonably large packaged state: %zu", length);
2311 return -1;
2312 }
2313
2314 bioc = qio_channel_buffer_new(length);
2315 qio_channel_set_name(QIO_CHANNEL(bioc), "migration-loadvm-buffer");
2316 ret = qemu_get_buffer(mis->from_src_file,
2317 bioc->data,
2318 length);
2319 if (ret != length) {
2320 object_unref(OBJECT(bioc));
2321 error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%zu",
2322 ret, length);
2323 return (ret < 0) ? ret : -EAGAIN;
2324 }
2325 bioc->usage += length;
2326 trace_loadvm_handle_cmd_packaged_received(ret);
2327
2328 QEMUFile *packf = qemu_file_new_input(QIO_CHANNEL(bioc));
2329
2330 /*
2331 * Before loading the guest states, ensure that the preempt channel has
2332 * been ready to use, as some of the states (e.g. via virtio_load) might
2333 * trigger page faults that will be handled through the preempt channel.
2334 * So yield to the main thread in the case that the channel create event
2335 * hasn't been dispatched.
2336 *
2337 * TODO: if we can move migration loadvm out of main thread, then we
2338 * won't block main thread from polling the accept() fds. We can drop
2339 * this as a whole when that is done.
2340 */
2341 do {
2342 if (!migrate_postcopy_preempt() || !qemu_in_coroutine() ||
2343 mis->postcopy_qemufile_dst) {
2344 break;
2345 }
2346
2347 aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
2348 qemu_coroutine_yield();
2349 } while (1);
2350
2351 ret = qemu_loadvm_state_main(packf, mis);
2352 trace_loadvm_handle_cmd_packaged_main(ret);
2353 qemu_fclose(packf);
2354 object_unref(OBJECT(bioc));
2355
2356 return ret;
2357 }
2358
2359 /*
2360 * Handle request that source requests for recved_bitmap on
2361 * destination. Payload format:
2362 *
2363 * len (1 byte) + ramblock_name (<255 bytes)
2364 */
loadvm_handle_recv_bitmap(MigrationIncomingState * mis,uint16_t len)2365 static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
2366 uint16_t len)
2367 {
2368 QEMUFile *file = mis->from_src_file;
2369 RAMBlock *rb;
2370 char block_name[256];
2371 size_t cnt;
2372
2373 cnt = qemu_get_counted_string(file, block_name);
2374 if (!cnt) {
2375 error_report("%s: failed to read block name", __func__);
2376 return -EINVAL;
2377 }
2378
2379 /* Validate before using the data */
2380 if (qemu_file_get_error(file)) {
2381 return qemu_file_get_error(file);
2382 }
2383
2384 if (len != cnt + 1) {
2385 error_report("%s: invalid payload length (%d)", __func__, len);
2386 return -EINVAL;
2387 }
2388
2389 rb = qemu_ram_block_by_name(block_name);
2390 if (!rb) {
2391 error_report("%s: block '%s' not found", __func__, block_name);
2392 return -EINVAL;
2393 }
2394
2395 migrate_send_rp_recv_bitmap(mis, block_name);
2396
2397 trace_loadvm_handle_recv_bitmap(block_name);
2398
2399 return 0;
2400 }
2401
loadvm_process_enable_colo(MigrationIncomingState * mis)2402 static int loadvm_process_enable_colo(MigrationIncomingState *mis)
2403 {
2404 int ret = migration_incoming_enable_colo();
2405
2406 if (!ret) {
2407 ret = colo_init_ram_cache();
2408 if (ret) {
2409 migration_incoming_disable_colo();
2410 }
2411 }
2412 return ret;
2413 }
2414
2415 /*
2416 * Process an incoming 'QEMU_VM_COMMAND'
2417 * 0 just a normal return
2418 * LOADVM_QUIT All good, but exit the loop
2419 * <0 Error
2420 */
loadvm_process_command(QEMUFile * f)2421 static int loadvm_process_command(QEMUFile *f)
2422 {
2423 MigrationIncomingState *mis = migration_incoming_get_current();
2424 uint16_t cmd;
2425 uint16_t len;
2426 uint32_t tmp32;
2427
2428 cmd = qemu_get_be16(f);
2429 len = qemu_get_be16(f);
2430
2431 /* Check validity before continue processing of cmds */
2432 if (qemu_file_get_error(f)) {
2433 return qemu_file_get_error(f);
2434 }
2435
2436 if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
2437 error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
2438 return -EINVAL;
2439 }
2440
2441 trace_loadvm_process_command(mig_cmd_args[cmd].name, len);
2442
2443 if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
2444 error_report("%s received with bad length - expecting %zu, got %d",
2445 mig_cmd_args[cmd].name,
2446 (size_t)mig_cmd_args[cmd].len, len);
2447 return -ERANGE;
2448 }
2449
2450 switch (cmd) {
2451 case MIG_CMD_OPEN_RETURN_PATH:
2452 if (mis->to_src_file) {
2453 error_report("CMD_OPEN_RETURN_PATH called when RP already open");
2454 /* Not really a problem, so don't give up */
2455 return 0;
2456 }
2457 mis->to_src_file = qemu_file_get_return_path(f);
2458 if (!mis->to_src_file) {
2459 error_report("CMD_OPEN_RETURN_PATH failed");
2460 return -1;
2461 }
2462
2463 /*
2464 * Switchover ack is enabled but no device uses it, so send an ACK to
2465 * source that it's OK to switchover. Do it here, after return path has
2466 * been created.
2467 */
2468 if (migrate_switchover_ack() && !mis->switchover_ack_pending_num) {
2469 int ret = migrate_send_rp_switchover_ack(mis);
2470 if (ret) {
2471 error_report(
2472 "Could not send switchover ack RP MSG, err %d (%s)", ret,
2473 strerror(-ret));
2474 return ret;
2475 }
2476 }
2477 break;
2478
2479 case MIG_CMD_PING:
2480 tmp32 = qemu_get_be32(f);
2481 trace_loadvm_process_command_ping(tmp32);
2482 if (!mis->to_src_file) {
2483 error_report("CMD_PING (0x%x) received with no return path",
2484 tmp32);
2485 return -1;
2486 }
2487 migrate_send_rp_pong(mis, tmp32);
2488 break;
2489
2490 case MIG_CMD_PACKAGED:
2491 return loadvm_handle_cmd_packaged(mis);
2492
2493 case MIG_CMD_POSTCOPY_ADVISE:
2494 return loadvm_postcopy_handle_advise(mis, len);
2495
2496 case MIG_CMD_POSTCOPY_LISTEN:
2497 return loadvm_postcopy_handle_listen(mis);
2498
2499 case MIG_CMD_POSTCOPY_RUN:
2500 return loadvm_postcopy_handle_run(mis);
2501
2502 case MIG_CMD_POSTCOPY_RAM_DISCARD:
2503 return loadvm_postcopy_ram_handle_discard(mis, len);
2504
2505 case MIG_CMD_POSTCOPY_RESUME:
2506 return loadvm_postcopy_handle_resume(mis);
2507
2508 case MIG_CMD_RECV_BITMAP:
2509 return loadvm_handle_recv_bitmap(mis, len);
2510
2511 case MIG_CMD_ENABLE_COLO:
2512 return loadvm_process_enable_colo(mis);
2513 }
2514
2515 return 0;
2516 }
2517
2518 /*
2519 * Read a footer off the wire and check that it matches the expected section
2520 *
2521 * Returns: true if the footer was good
2522 * false if there is a problem (and calls error_report to say why)
2523 */
check_section_footer(QEMUFile * f,SaveStateEntry * se)2524 static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
2525 {
2526 int ret;
2527 uint8_t read_mark;
2528 uint32_t read_section_id;
2529
2530 if (!migrate_get_current()->send_section_footer) {
2531 /* No footer to check */
2532 return true;
2533 }
2534
2535 read_mark = qemu_get_byte(f);
2536
2537 ret = qemu_file_get_error(f);
2538 if (ret) {
2539 error_report("%s: Read section footer failed: %d",
2540 __func__, ret);
2541 return false;
2542 }
2543
2544 if (read_mark != QEMU_VM_SECTION_FOOTER) {
2545 error_report("Missing section footer for %s", se->idstr);
2546 return false;
2547 }
2548
2549 read_section_id = qemu_get_be32(f);
2550 if (read_section_id != se->load_section_id) {
2551 error_report("Mismatched section id in footer for %s -"
2552 " read 0x%x expected 0x%x",
2553 se->idstr, read_section_id, se->load_section_id);
2554 return false;
2555 }
2556
2557 /* All good */
2558 return true;
2559 }
2560
2561 static int
qemu_loadvm_section_start_full(QEMUFile * f,uint8_t type)2562 qemu_loadvm_section_start_full(QEMUFile *f, uint8_t type)
2563 {
2564 bool trace_downtime = (type == QEMU_VM_SECTION_FULL);
2565 uint32_t instance_id, version_id, section_id;
2566 int64_t start_ts, end_ts;
2567 SaveStateEntry *se;
2568 char idstr[256];
2569 int ret;
2570
2571 /* Read section start */
2572 section_id = qemu_get_be32(f);
2573 if (!qemu_get_counted_string(f, idstr)) {
2574 error_report("Unable to read ID string for section %u",
2575 section_id);
2576 return -EINVAL;
2577 }
2578 instance_id = qemu_get_be32(f);
2579 version_id = qemu_get_be32(f);
2580
2581 ret = qemu_file_get_error(f);
2582 if (ret) {
2583 error_report("%s: Failed to read instance/version ID: %d",
2584 __func__, ret);
2585 return ret;
2586 }
2587
2588 trace_qemu_loadvm_state_section_startfull(section_id, idstr,
2589 instance_id, version_id);
2590 /* Find savevm section */
2591 se = find_se(idstr, instance_id);
2592 if (se == NULL) {
2593 error_report("Unknown savevm section or instance '%s' %"PRIu32". "
2594 "Make sure that your current VM setup matches your "
2595 "saved VM setup, including any hotplugged devices",
2596 idstr, instance_id);
2597 return -EINVAL;
2598 }
2599
2600 /* Validate version */
2601 if (version_id > se->version_id) {
2602 error_report("savevm: unsupported version %d for '%s' v%d",
2603 version_id, idstr, se->version_id);
2604 return -EINVAL;
2605 }
2606 se->load_version_id = version_id;
2607 se->load_section_id = section_id;
2608
2609 /* Validate if it is a device's state */
2610 if (xen_enabled() && se->is_ram) {
2611 error_report("loadvm: %s RAM loading not allowed on Xen", idstr);
2612 return -EINVAL;
2613 }
2614
2615 if (trace_downtime) {
2616 start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
2617 }
2618
2619 ret = vmstate_load(f, se);
2620 if (ret < 0) {
2621 error_report("error while loading state for instance 0x%"PRIx32" of"
2622 " device '%s'", instance_id, idstr);
2623 return ret;
2624 }
2625
2626 if (trace_downtime) {
2627 end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
2628 trace_vmstate_downtime_load("non-iterable", se->idstr,
2629 se->instance_id, end_ts - start_ts);
2630 }
2631
2632 if (!check_section_footer(f, se)) {
2633 return -EINVAL;
2634 }
2635
2636 return 0;
2637 }
2638
2639 static int
qemu_loadvm_section_part_end(QEMUFile * f,uint8_t type)2640 qemu_loadvm_section_part_end(QEMUFile *f, uint8_t type)
2641 {
2642 bool trace_downtime = (type == QEMU_VM_SECTION_END);
2643 int64_t start_ts, end_ts;
2644 uint32_t section_id;
2645 SaveStateEntry *se;
2646 int ret;
2647
2648 section_id = qemu_get_be32(f);
2649
2650 ret = qemu_file_get_error(f);
2651 if (ret) {
2652 error_report("%s: Failed to read section ID: %d",
2653 __func__, ret);
2654 return ret;
2655 }
2656
2657 trace_qemu_loadvm_state_section_partend(section_id);
2658 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2659 if (se->load_section_id == section_id) {
2660 break;
2661 }
2662 }
2663 if (se == NULL) {
2664 error_report("Unknown savevm section %d", section_id);
2665 return -EINVAL;
2666 }
2667
2668 if (trace_downtime) {
2669 start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
2670 }
2671
2672 ret = vmstate_load(f, se);
2673 if (ret < 0) {
2674 error_report("error while loading state section id %d(%s)",
2675 section_id, se->idstr);
2676 return ret;
2677 }
2678
2679 if (trace_downtime) {
2680 end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
2681 trace_vmstate_downtime_load("iterable", se->idstr,
2682 se->instance_id, end_ts - start_ts);
2683 }
2684
2685 if (!check_section_footer(f, se)) {
2686 return -EINVAL;
2687 }
2688
2689 return 0;
2690 }
2691
qemu_loadvm_state_header(QEMUFile * f)2692 static int qemu_loadvm_state_header(QEMUFile *f)
2693 {
2694 unsigned int v;
2695 int ret;
2696
2697 v = qemu_get_be32(f);
2698 if (v != QEMU_VM_FILE_MAGIC) {
2699 error_report("Not a migration stream");
2700 return -EINVAL;
2701 }
2702
2703 v = qemu_get_be32(f);
2704 if (v == QEMU_VM_FILE_VERSION_COMPAT) {
2705 error_report("SaveVM v2 format is obsolete and don't work anymore");
2706 return -ENOTSUP;
2707 }
2708 if (v != QEMU_VM_FILE_VERSION) {
2709 error_report("Unsupported migration stream version");
2710 return -ENOTSUP;
2711 }
2712
2713 if (migrate_get_current()->send_configuration) {
2714 if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
2715 error_report("Configuration section missing");
2716 return -EINVAL;
2717 }
2718 ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
2719
2720 if (ret) {
2721 return ret;
2722 }
2723 }
2724 return 0;
2725 }
2726
qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState * mis)2727 static void qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis)
2728 {
2729 SaveStateEntry *se;
2730
2731 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2732 if (!se->ops || !se->ops->switchover_ack_needed) {
2733 continue;
2734 }
2735
2736 if (se->ops->switchover_ack_needed(se->opaque)) {
2737 mis->switchover_ack_pending_num++;
2738 }
2739 }
2740
2741 trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num);
2742 }
2743
qemu_loadvm_state_setup(QEMUFile * f,Error ** errp)2744 static int qemu_loadvm_state_setup(QEMUFile *f, Error **errp)
2745 {
2746 ERRP_GUARD();
2747 SaveStateEntry *se;
2748 int ret;
2749
2750 trace_loadvm_state_setup();
2751 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2752 if (!se->ops || !se->ops->load_setup) {
2753 continue;
2754 }
2755 if (se->ops->is_active) {
2756 if (!se->ops->is_active(se->opaque)) {
2757 continue;
2758 }
2759 }
2760
2761 ret = se->ops->load_setup(f, se->opaque, errp);
2762 if (ret < 0) {
2763 error_prepend(errp, "Load state of device %s failed: ",
2764 se->idstr);
2765 qemu_file_set_error(f, ret);
2766 return ret;
2767 }
2768 }
2769 return 0;
2770 }
2771
qemu_loadvm_state_cleanup(void)2772 void qemu_loadvm_state_cleanup(void)
2773 {
2774 SaveStateEntry *se;
2775
2776 trace_loadvm_state_cleanup();
2777 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2778 if (se->ops && se->ops->load_cleanup) {
2779 se->ops->load_cleanup(se->opaque);
2780 }
2781 }
2782 }
2783
2784 /* Return true if we should continue the migration, or false. */
postcopy_pause_incoming(MigrationIncomingState * mis)2785 static bool postcopy_pause_incoming(MigrationIncomingState *mis)
2786 {
2787 int i;
2788
2789 trace_postcopy_pause_incoming();
2790
2791 assert(migrate_postcopy_ram());
2792
2793 /*
2794 * Unregister yank with either from/to src would work, since ioc behind it
2795 * is the same
2796 */
2797 migration_ioc_unregister_yank_from_file(mis->from_src_file);
2798
2799 assert(mis->from_src_file);
2800 qemu_file_shutdown(mis->from_src_file);
2801 qemu_fclose(mis->from_src_file);
2802 mis->from_src_file = NULL;
2803
2804 assert(mis->to_src_file);
2805 qemu_file_shutdown(mis->to_src_file);
2806 qemu_mutex_lock(&mis->rp_mutex);
2807 qemu_fclose(mis->to_src_file);
2808 mis->to_src_file = NULL;
2809 qemu_mutex_unlock(&mis->rp_mutex);
2810
2811 /*
2812 * NOTE: this must happen before reset the PostcopyTmpPages below,
2813 * otherwise it's racy to reset those fields when the fast load thread
2814 * can be accessing it in parallel.
2815 */
2816 if (mis->postcopy_qemufile_dst) {
2817 qemu_file_shutdown(mis->postcopy_qemufile_dst);
2818 /* Take the mutex to make sure the fast ram load thread halted */
2819 qemu_mutex_lock(&mis->postcopy_prio_thread_mutex);
2820 migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
2821 qemu_fclose(mis->postcopy_qemufile_dst);
2822 mis->postcopy_qemufile_dst = NULL;
2823 qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
2824 }
2825
2826 /* Current state can be either ACTIVE or RECOVER */
2827 migrate_set_state(&mis->state, mis->state,
2828 MIGRATION_STATUS_POSTCOPY_PAUSED);
2829
2830 /* Notify the fault thread for the invalidated file handle */
2831 postcopy_fault_thread_notify(mis);
2832
2833 /*
2834 * If network is interrupted, any temp page we received will be useless
2835 * because we didn't mark them as "received" in receivedmap. After a
2836 * proper recovery later (which will sync src dirty bitmap with receivedmap
2837 * on dest) these cached small pages will be resent again.
2838 */
2839 for (i = 0; i < mis->postcopy_channels; i++) {
2840 postcopy_temp_page_reset(&mis->postcopy_tmp_pages[i]);
2841 }
2842
2843 error_report("Detected IO failure for postcopy. "
2844 "Migration paused.");
2845
2846 do {
2847 qemu_sem_wait(&mis->postcopy_pause_sem_dst);
2848 } while (postcopy_is_paused(mis->state));
2849
2850 trace_postcopy_pause_incoming_continued();
2851
2852 return true;
2853 }
2854
qemu_loadvm_state_main(QEMUFile * f,MigrationIncomingState * mis)2855 int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
2856 {
2857 uint8_t section_type;
2858 int ret = 0;
2859
2860 retry:
2861 while (true) {
2862 section_type = qemu_get_byte(f);
2863
2864 ret = qemu_file_get_error_obj_any(f, mis->postcopy_qemufile_dst, NULL);
2865 if (ret) {
2866 break;
2867 }
2868
2869 trace_qemu_loadvm_state_section(section_type);
2870 switch (section_type) {
2871 case QEMU_VM_SECTION_START:
2872 case QEMU_VM_SECTION_FULL:
2873 ret = qemu_loadvm_section_start_full(f, section_type);
2874 if (ret < 0) {
2875 goto out;
2876 }
2877 break;
2878 case QEMU_VM_SECTION_PART:
2879 case QEMU_VM_SECTION_END:
2880 ret = qemu_loadvm_section_part_end(f, section_type);
2881 if (ret < 0) {
2882 goto out;
2883 }
2884 break;
2885 case QEMU_VM_COMMAND:
2886 ret = loadvm_process_command(f);
2887 trace_qemu_loadvm_state_section_command(ret);
2888 if ((ret < 0) || (ret == LOADVM_QUIT)) {
2889 goto out;
2890 }
2891 break;
2892 case QEMU_VM_EOF:
2893 /* This is the end of migration */
2894 goto out;
2895 default:
2896 error_report("Unknown savevm section type %d", section_type);
2897 ret = -EINVAL;
2898 goto out;
2899 }
2900 }
2901
2902 out:
2903 if (ret < 0) {
2904 qemu_file_set_error(f, ret);
2905
2906 /* Cancel bitmaps incoming regardless of recovery */
2907 dirty_bitmap_mig_cancel_incoming();
2908
2909 /*
2910 * If we are during an active postcopy, then we pause instead
2911 * of bail out to at least keep the VM's dirty data. Note
2912 * that POSTCOPY_INCOMING_LISTENING stage is still not enough,
2913 * during which we're still receiving device states and we
2914 * still haven't yet started the VM on destination.
2915 *
2916 * Only RAM postcopy supports recovery. Still, if RAM postcopy is
2917 * enabled, canceled bitmaps postcopy will not affect RAM postcopy
2918 * recovering.
2919 */
2920 if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
2921 migrate_postcopy_ram() && postcopy_pause_incoming(mis)) {
2922 /* Reset f to point to the newly created channel */
2923 f = mis->from_src_file;
2924 goto retry;
2925 }
2926 }
2927 return ret;
2928 }
2929
qemu_loadvm_state(QEMUFile * f)2930 int qemu_loadvm_state(QEMUFile *f)
2931 {
2932 MigrationIncomingState *mis = migration_incoming_get_current();
2933 Error *local_err = NULL;
2934 int ret;
2935
2936 if (qemu_savevm_state_blocked(&local_err)) {
2937 error_report_err(local_err);
2938 return -EINVAL;
2939 }
2940
2941 ret = qemu_loadvm_state_header(f);
2942 if (ret) {
2943 return ret;
2944 }
2945
2946 if (qemu_loadvm_state_setup(f, &local_err) != 0) {
2947 error_report_err(local_err);
2948 return -EINVAL;
2949 }
2950
2951 if (migrate_switchover_ack()) {
2952 qemu_loadvm_state_switchover_ack_needed(mis);
2953 }
2954
2955 cpu_synchronize_all_pre_loadvm();
2956
2957 ret = qemu_loadvm_state_main(f, mis);
2958 qemu_event_set(&mis->main_thread_load_event);
2959
2960 trace_qemu_loadvm_state_post_main(ret);
2961
2962 if (mis->have_listen_thread) {
2963 /*
2964 * Postcopy listen thread still going, don't synchronize the
2965 * cpus yet.
2966 */
2967 return ret;
2968 }
2969
2970 if (ret == 0) {
2971 ret = qemu_file_get_error(f);
2972 }
2973
2974 /*
2975 * Try to read in the VMDESC section as well, so that dumping tools that
2976 * intercept our migration stream have the chance to see it.
2977 */
2978
2979 /* We've got to be careful; if we don't read the data and just shut the fd
2980 * then the sender can error if we close while it's still sending.
2981 * We also mustn't read data that isn't there; some transports (RDMA)
2982 * will stall waiting for that data when the source has already closed.
2983 */
2984 if (ret == 0 && should_send_vmdesc()) {
2985 uint8_t *buf;
2986 uint32_t size;
2987 uint8_t section_type = qemu_get_byte(f);
2988
2989 if (section_type != QEMU_VM_VMDESCRIPTION) {
2990 error_report("Expected vmdescription section, but got %d",
2991 section_type);
2992 /*
2993 * It doesn't seem worth failing at this point since
2994 * we apparently have an otherwise valid VM state
2995 */
2996 } else {
2997 buf = g_malloc(0x1000);
2998 size = qemu_get_be32(f);
2999
3000 while (size > 0) {
3001 uint32_t read_chunk = MIN(size, 0x1000);
3002 qemu_get_buffer(f, buf, read_chunk);
3003 size -= read_chunk;
3004 }
3005 g_free(buf);
3006 }
3007 }
3008
3009 cpu_synchronize_all_post_init();
3010
3011 return ret;
3012 }
3013
qemu_load_device_state(QEMUFile * f)3014 int qemu_load_device_state(QEMUFile *f)
3015 {
3016 MigrationIncomingState *mis = migration_incoming_get_current();
3017 int ret;
3018
3019 /* Load QEMU_VM_SECTION_FULL section */
3020 ret = qemu_loadvm_state_main(f, mis);
3021 if (ret < 0) {
3022 error_report("Failed to load device state: %d", ret);
3023 return ret;
3024 }
3025
3026 cpu_synchronize_all_post_init();
3027 return 0;
3028 }
3029
qemu_loadvm_approve_switchover(void)3030 int qemu_loadvm_approve_switchover(void)
3031 {
3032 MigrationIncomingState *mis = migration_incoming_get_current();
3033
3034 if (!mis->switchover_ack_pending_num) {
3035 return -EINVAL;
3036 }
3037
3038 mis->switchover_ack_pending_num--;
3039 trace_loadvm_approve_switchover(mis->switchover_ack_pending_num);
3040
3041 if (mis->switchover_ack_pending_num) {
3042 return 0;
3043 }
3044
3045 return migrate_send_rp_switchover_ack(mis);
3046 }
3047
save_snapshot(const char * name,bool overwrite,const char * vmstate,bool has_devices,strList * devices,Error ** errp)3048 bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
3049 bool has_devices, strList *devices, Error **errp)
3050 {
3051 BlockDriverState *bs;
3052 QEMUSnapshotInfo sn1, *sn = &sn1;
3053 int ret = -1, ret2;
3054 QEMUFile *f;
3055 RunState saved_state = runstate_get();
3056 uint64_t vm_state_size;
3057 g_autoptr(GDateTime) now = g_date_time_new_now_local();
3058
3059 GLOBAL_STATE_CODE();
3060
3061 if (migration_is_blocked(errp)) {
3062 return false;
3063 }
3064
3065 if (!replay_can_snapshot()) {
3066 error_setg(errp, "Record/replay does not allow making snapshot "
3067 "right now. Try once more later.");
3068 return false;
3069 }
3070
3071 if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
3072 return false;
3073 }
3074
3075 /* Delete old snapshots of the same name */
3076 if (name) {
3077 if (overwrite) {
3078 if (bdrv_all_delete_snapshot(name, has_devices,
3079 devices, errp) < 0) {
3080 return false;
3081 }
3082 } else {
3083 ret2 = bdrv_all_has_snapshot(name, has_devices, devices, errp);
3084 if (ret2 < 0) {
3085 return false;
3086 }
3087 if (ret2 == 1) {
3088 error_setg(errp,
3089 "Snapshot '%s' already exists in one or more devices",
3090 name);
3091 return false;
3092 }
3093 }
3094 }
3095
3096 bs = bdrv_all_find_vmstate_bs(vmstate, has_devices, devices, errp);
3097 if (bs == NULL) {
3098 return false;
3099 }
3100
3101 global_state_store();
3102 vm_stop(RUN_STATE_SAVE_VM);
3103
3104 bdrv_drain_all_begin();
3105
3106 memset(sn, 0, sizeof(*sn));
3107
3108 /* fill auxiliary fields */
3109 sn->date_sec = g_date_time_to_unix(now);
3110 sn->date_nsec = g_date_time_get_microsecond(now) * 1000;
3111 sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
3112 if (replay_mode != REPLAY_MODE_NONE) {
3113 sn->icount = replay_get_current_icount();
3114 } else {
3115 sn->icount = -1ULL;
3116 }
3117
3118 if (name) {
3119 pstrcpy(sn->name, sizeof(sn->name), name);
3120 } else {
3121 g_autofree char *autoname = g_date_time_format(now, "vm-%Y%m%d%H%M%S");
3122 pstrcpy(sn->name, sizeof(sn->name), autoname);
3123 }
3124
3125 /* save the VM state */
3126 f = qemu_fopen_bdrv(bs, 1);
3127 if (!f) {
3128 error_setg(errp, "Could not open VM state file");
3129 goto the_end;
3130 }
3131 ret = qemu_savevm_state(f, errp);
3132 vm_state_size = qemu_file_transferred(f);
3133 ret2 = qemu_fclose(f);
3134 if (ret < 0) {
3135 goto the_end;
3136 }
3137 if (ret2 < 0) {
3138 ret = ret2;
3139 goto the_end;
3140 }
3141
3142 ret = bdrv_all_create_snapshot(sn, bs, vm_state_size,
3143 has_devices, devices, errp);
3144 if (ret < 0) {
3145 bdrv_all_delete_snapshot(sn->name, has_devices, devices, NULL);
3146 goto the_end;
3147 }
3148
3149 ret = 0;
3150
3151 the_end:
3152 bdrv_drain_all_end();
3153
3154 vm_resume(saved_state);
3155 return ret == 0;
3156 }
3157
qmp_xen_save_devices_state(const char * filename,bool has_live,bool live,Error ** errp)3158 void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live,
3159 Error **errp)
3160 {
3161 QEMUFile *f;
3162 QIOChannelFile *ioc;
3163 int saved_vm_running;
3164 int ret;
3165
3166 if (!has_live) {
3167 /* live default to true so old version of Xen tool stack can have a
3168 * successful live migration */
3169 live = true;
3170 }
3171
3172 saved_vm_running = runstate_is_running();
3173 vm_stop(RUN_STATE_SAVE_VM);
3174 global_state_store_running();
3175
3176 ioc = qio_channel_file_new_path(filename, O_WRONLY | O_CREAT | O_TRUNC,
3177 0660, errp);
3178 if (!ioc) {
3179 goto the_end;
3180 }
3181 qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-save-state");
3182 f = qemu_file_new_output(QIO_CHANNEL(ioc));
3183 object_unref(OBJECT(ioc));
3184 ret = qemu_save_device_state(f);
3185 if (ret < 0 || qemu_fclose(f) < 0) {
3186 error_setg(errp, "saving Xen device state failed");
3187 } else {
3188 /* libxl calls the QMP command "stop" before calling
3189 * "xen-save-devices-state" and in case of migration failure, libxl
3190 * would call "cont".
3191 * So call bdrv_inactivate_all (release locks) here to let the other
3192 * side of the migration take control of the images.
3193 */
3194 if (live && !saved_vm_running) {
3195 ret = bdrv_inactivate_all();
3196 if (ret) {
3197 error_setg(errp, "%s: bdrv_inactivate_all() failed (%d)",
3198 __func__, ret);
3199 }
3200 }
3201 }
3202
3203 the_end:
3204 if (saved_vm_running) {
3205 vm_start();
3206 }
3207 }
3208
qmp_xen_load_devices_state(const char * filename,Error ** errp)3209 void qmp_xen_load_devices_state(const char *filename, Error **errp)
3210 {
3211 QEMUFile *f;
3212 QIOChannelFile *ioc;
3213 int ret;
3214
3215 /* Guest must be paused before loading the device state; the RAM state
3216 * will already have been loaded by xc
3217 */
3218 if (runstate_is_running()) {
3219 error_setg(errp, "Cannot update device state while vm is running");
3220 return;
3221 }
3222 vm_stop(RUN_STATE_RESTORE_VM);
3223
3224 ioc = qio_channel_file_new_path(filename, O_RDONLY | O_BINARY, 0, errp);
3225 if (!ioc) {
3226 return;
3227 }
3228 qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-load-state");
3229 f = qemu_file_new_input(QIO_CHANNEL(ioc));
3230 object_unref(OBJECT(ioc));
3231
3232 ret = qemu_loadvm_state(f);
3233 qemu_fclose(f);
3234 if (ret < 0) {
3235 error_setg(errp, "loading Xen device state failed");
3236 }
3237 migration_incoming_state_destroy();
3238 }
3239
load_snapshot(const char * name,const char * vmstate,bool has_devices,strList * devices,Error ** errp)3240 bool load_snapshot(const char *name, const char *vmstate,
3241 bool has_devices, strList *devices, Error **errp)
3242 {
3243 BlockDriverState *bs_vm_state;
3244 QEMUSnapshotInfo sn;
3245 QEMUFile *f;
3246 int ret;
3247 MigrationIncomingState *mis = migration_incoming_get_current();
3248
3249 if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
3250 return false;
3251 }
3252 ret = bdrv_all_has_snapshot(name, has_devices, devices, errp);
3253 if (ret < 0) {
3254 return false;
3255 }
3256 if (ret == 0) {
3257 error_setg(errp, "Snapshot '%s' does not exist in one or more devices",
3258 name);
3259 return false;
3260 }
3261
3262 bs_vm_state = bdrv_all_find_vmstate_bs(vmstate, has_devices, devices, errp);
3263 if (!bs_vm_state) {
3264 return false;
3265 }
3266
3267 /* Don't even try to load empty VM states */
3268 ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
3269 if (ret < 0) {
3270 error_setg(errp, "Snapshot can not be found");
3271 return false;
3272 } else if (sn.vm_state_size == 0) {
3273 error_setg(errp, "This is a disk-only snapshot. Revert to it "
3274 " offline using qemu-img");
3275 return false;
3276 }
3277
3278 /*
3279 * Flush the record/replay queue. Now the VM state is going
3280 * to change. Therefore we don't need to preserve its consistency
3281 */
3282 replay_flush_events();
3283
3284 /* Flush all IO requests so they don't interfere with the new state. */
3285 bdrv_drain_all_begin();
3286
3287 ret = bdrv_all_goto_snapshot(name, has_devices, devices, errp);
3288 if (ret < 0) {
3289 goto err_drain;
3290 }
3291
3292 /* restore the VM state */
3293 f = qemu_fopen_bdrv(bs_vm_state, 0);
3294 if (!f) {
3295 error_setg(errp, "Could not open VM state file");
3296 goto err_drain;
3297 }
3298
3299 qemu_system_reset(SHUTDOWN_CAUSE_SNAPSHOT_LOAD);
3300 mis->from_src_file = f;
3301
3302 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
3303 ret = -EINVAL;
3304 goto err_drain;
3305 }
3306 ret = qemu_loadvm_state(f);
3307 migration_incoming_state_destroy();
3308
3309 bdrv_drain_all_end();
3310
3311 if (ret < 0) {
3312 error_setg(errp, "Error %d while loading VM state", ret);
3313 return false;
3314 }
3315
3316 return true;
3317
3318 err_drain:
3319 bdrv_drain_all_end();
3320 return false;
3321 }
3322
load_snapshot_resume(RunState state)3323 void load_snapshot_resume(RunState state)
3324 {
3325 vm_resume(state);
3326 if (state == RUN_STATE_RUNNING && runstate_get() == RUN_STATE_SUSPENDED) {
3327 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, &error_abort);
3328 }
3329 }
3330
delete_snapshot(const char * name,bool has_devices,strList * devices,Error ** errp)3331 bool delete_snapshot(const char *name, bool has_devices,
3332 strList *devices, Error **errp)
3333 {
3334 if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
3335 return false;
3336 }
3337
3338 if (bdrv_all_delete_snapshot(name, has_devices, devices, errp) < 0) {
3339 return false;
3340 }
3341
3342 return true;
3343 }
3344
vmstate_register_ram(MemoryRegion * mr,DeviceState * dev)3345 void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
3346 {
3347 qemu_ram_set_idstr(mr->ram_block,
3348 memory_region_name(mr), dev);
3349 qemu_ram_set_migratable(mr->ram_block);
3350 }
3351
vmstate_unregister_ram(MemoryRegion * mr,DeviceState * dev)3352 void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
3353 {
3354 qemu_ram_unset_idstr(mr->ram_block);
3355 qemu_ram_unset_migratable(mr->ram_block);
3356 }
3357
vmstate_register_ram_global(MemoryRegion * mr)3358 void vmstate_register_ram_global(MemoryRegion *mr)
3359 {
3360 vmstate_register_ram(mr, NULL);
3361 }
3362
vmstate_check_only_migratable(const VMStateDescription * vmsd)3363 bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
3364 {
3365 /* check needed if --only-migratable is specified */
3366 if (!only_migratable) {
3367 return true;
3368 }
3369
3370 return !(vmsd && vmsd->unmigratable);
3371 }
3372
3373 typedef struct SnapshotJob {
3374 Job common;
3375 char *tag;
3376 char *vmstate;
3377 strList *devices;
3378 Coroutine *co;
3379 Error **errp;
3380 bool ret;
3381 } SnapshotJob;
3382
qmp_snapshot_job_free(SnapshotJob * s)3383 static void qmp_snapshot_job_free(SnapshotJob *s)
3384 {
3385 g_free(s->tag);
3386 g_free(s->vmstate);
3387 qapi_free_strList(s->devices);
3388 }
3389
3390
snapshot_load_job_bh(void * opaque)3391 static void snapshot_load_job_bh(void *opaque)
3392 {
3393 Job *job = opaque;
3394 SnapshotJob *s = container_of(job, SnapshotJob, common);
3395 RunState orig_state = runstate_get();
3396
3397 job_progress_set_remaining(&s->common, 1);
3398
3399 vm_stop(RUN_STATE_RESTORE_VM);
3400
3401 s->ret = load_snapshot(s->tag, s->vmstate, true, s->devices, s->errp);
3402 if (s->ret) {
3403 load_snapshot_resume(orig_state);
3404 }
3405
3406 job_progress_update(&s->common, 1);
3407
3408 qmp_snapshot_job_free(s);
3409 aio_co_wake(s->co);
3410 }
3411
snapshot_save_job_bh(void * opaque)3412 static void snapshot_save_job_bh(void *opaque)
3413 {
3414 Job *job = opaque;
3415 SnapshotJob *s = container_of(job, SnapshotJob, common);
3416
3417 job_progress_set_remaining(&s->common, 1);
3418 s->ret = save_snapshot(s->tag, false, s->vmstate,
3419 true, s->devices, s->errp);
3420 job_progress_update(&s->common, 1);
3421
3422 qmp_snapshot_job_free(s);
3423 aio_co_wake(s->co);
3424 }
3425
snapshot_delete_job_bh(void * opaque)3426 static void snapshot_delete_job_bh(void *opaque)
3427 {
3428 Job *job = opaque;
3429 SnapshotJob *s = container_of(job, SnapshotJob, common);
3430
3431 job_progress_set_remaining(&s->common, 1);
3432 s->ret = delete_snapshot(s->tag, true, s->devices, s->errp);
3433 job_progress_update(&s->common, 1);
3434
3435 qmp_snapshot_job_free(s);
3436 aio_co_wake(s->co);
3437 }
3438
snapshot_save_job_run(Job * job,Error ** errp)3439 static int coroutine_fn snapshot_save_job_run(Job *job, Error **errp)
3440 {
3441 SnapshotJob *s = container_of(job, SnapshotJob, common);
3442 s->errp = errp;
3443 s->co = qemu_coroutine_self();
3444 aio_bh_schedule_oneshot(qemu_get_aio_context(),
3445 snapshot_save_job_bh, job);
3446 qemu_coroutine_yield();
3447 return s->ret ? 0 : -1;
3448 }
3449
snapshot_load_job_run(Job * job,Error ** errp)3450 static int coroutine_fn snapshot_load_job_run(Job *job, Error **errp)
3451 {
3452 SnapshotJob *s = container_of(job, SnapshotJob, common);
3453 s->errp = errp;
3454 s->co = qemu_coroutine_self();
3455 aio_bh_schedule_oneshot(qemu_get_aio_context(),
3456 snapshot_load_job_bh, job);
3457 qemu_coroutine_yield();
3458 return s->ret ? 0 : -1;
3459 }
3460
snapshot_delete_job_run(Job * job,Error ** errp)3461 static int coroutine_fn snapshot_delete_job_run(Job *job, Error **errp)
3462 {
3463 SnapshotJob *s = container_of(job, SnapshotJob, common);
3464 s->errp = errp;
3465 s->co = qemu_coroutine_self();
3466 aio_bh_schedule_oneshot(qemu_get_aio_context(),
3467 snapshot_delete_job_bh, job);
3468 qemu_coroutine_yield();
3469 return s->ret ? 0 : -1;
3470 }
3471
3472
3473 static const JobDriver snapshot_load_job_driver = {
3474 .instance_size = sizeof(SnapshotJob),
3475 .job_type = JOB_TYPE_SNAPSHOT_LOAD,
3476 .run = snapshot_load_job_run,
3477 };
3478
3479 static const JobDriver snapshot_save_job_driver = {
3480 .instance_size = sizeof(SnapshotJob),
3481 .job_type = JOB_TYPE_SNAPSHOT_SAVE,
3482 .run = snapshot_save_job_run,
3483 };
3484
3485 static const JobDriver snapshot_delete_job_driver = {
3486 .instance_size = sizeof(SnapshotJob),
3487 .job_type = JOB_TYPE_SNAPSHOT_DELETE,
3488 .run = snapshot_delete_job_run,
3489 };
3490
3491
qmp_snapshot_save(const char * job_id,const char * tag,const char * vmstate,strList * devices,Error ** errp)3492 void qmp_snapshot_save(const char *job_id,
3493 const char *tag,
3494 const char *vmstate,
3495 strList *devices,
3496 Error **errp)
3497 {
3498 SnapshotJob *s;
3499
3500 s = job_create(job_id, &snapshot_save_job_driver, NULL,
3501 qemu_get_aio_context(), JOB_MANUAL_DISMISS,
3502 NULL, NULL, errp);
3503 if (!s) {
3504 return;
3505 }
3506
3507 s->tag = g_strdup(tag);
3508 s->vmstate = g_strdup(vmstate);
3509 s->devices = QAPI_CLONE(strList, devices);
3510
3511 job_start(&s->common);
3512 }
3513
qmp_snapshot_load(const char * job_id,const char * tag,const char * vmstate,strList * devices,Error ** errp)3514 void qmp_snapshot_load(const char *job_id,
3515 const char *tag,
3516 const char *vmstate,
3517 strList *devices,
3518 Error **errp)
3519 {
3520 SnapshotJob *s;
3521
3522 s = job_create(job_id, &snapshot_load_job_driver, NULL,
3523 qemu_get_aio_context(), JOB_MANUAL_DISMISS,
3524 NULL, NULL, errp);
3525 if (!s) {
3526 return;
3527 }
3528
3529 s->tag = g_strdup(tag);
3530 s->vmstate = g_strdup(vmstate);
3531 s->devices = QAPI_CLONE(strList, devices);
3532
3533 job_start(&s->common);
3534 }
3535
qmp_snapshot_delete(const char * job_id,const char * tag,strList * devices,Error ** errp)3536 void qmp_snapshot_delete(const char *job_id,
3537 const char *tag,
3538 strList *devices,
3539 Error **errp)
3540 {
3541 SnapshotJob *s;
3542
3543 s = job_create(job_id, &snapshot_delete_job_driver, NULL,
3544 qemu_get_aio_context(), JOB_MANUAL_DISMISS,
3545 NULL, NULL, errp);
3546 if (!s) {
3547 return;
3548 }
3549
3550 s->tag = g_strdup(tag);
3551 s->devices = QAPI_CLONE(strList, devices);
3552
3553 job_start(&s->common);
3554 }
3555