xref: /openbmc/qemu/migration/savevm.c (revision b23197f9)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2009-2015 Red Hat Inc
6  *
7  * Authors:
8  *  Juan Quintela <quintela@redhat.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  */
28 
29 #include "qemu/osdep.h"
30 #include "qemu-common.h"
31 #include "hw/boards.h"
32 #include "hw/hw.h"
33 #include "hw/qdev.h"
34 #include "net/net.h"
35 #include "monitor/monitor.h"
36 #include "sysemu/sysemu.h"
37 #include "qemu/timer.h"
38 #include "audio/audio.h"
39 #include "migration/migration.h"
40 #include "migration/postcopy-ram.h"
41 #include "qapi/qmp/qerror.h"
42 #include "qemu/error-report.h"
43 #include "qemu/sockets.h"
44 #include "qemu/queue.h"
45 #include "sysemu/cpus.h"
46 #include "exec/memory.h"
47 #include "qmp-commands.h"
48 #include "trace.h"
49 #include "qemu/bitops.h"
50 #include "qemu/iov.h"
51 #include "block/snapshot.h"
52 #include "block/qapi.h"
53 
54 
55 #ifndef ETH_P_RARP
56 #define ETH_P_RARP 0x8035
57 #endif
58 #define ARP_HTYPE_ETH 0x0001
59 #define ARP_PTYPE_IP 0x0800
60 #define ARP_OP_REQUEST_REV 0x3
61 
62 const unsigned int postcopy_ram_discard_version = 0;
63 
64 static bool skip_section_footers;
65 
66 static struct mig_cmd_args {
67     ssize_t     len; /* -1 = variable */
68     const char *name;
69 } mig_cmd_args[] = {
70     [MIG_CMD_INVALID]          = { .len = -1, .name = "INVALID" },
71     [MIG_CMD_OPEN_RETURN_PATH] = { .len =  0, .name = "OPEN_RETURN_PATH" },
72     [MIG_CMD_PING]             = { .len = sizeof(uint32_t), .name = "PING" },
73     [MIG_CMD_POSTCOPY_ADVISE]  = { .len = 16, .name = "POSTCOPY_ADVISE" },
74     [MIG_CMD_POSTCOPY_LISTEN]  = { .len =  0, .name = "POSTCOPY_LISTEN" },
75     [MIG_CMD_POSTCOPY_RUN]     = { .len =  0, .name = "POSTCOPY_RUN" },
76     [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
77                                    .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
78     [MIG_CMD_PACKAGED]         = { .len =  4, .name = "PACKAGED" },
79     [MIG_CMD_MAX]              = { .len = -1, .name = "MAX" },
80 };
81 
82 static int announce_self_create(uint8_t *buf,
83                                 uint8_t *mac_addr)
84 {
85     /* Ethernet header. */
86     memset(buf, 0xff, 6);         /* destination MAC addr */
87     memcpy(buf + 6, mac_addr, 6); /* source MAC addr */
88     *(uint16_t *)(buf + 12) = htons(ETH_P_RARP); /* ethertype */
89 
90     /* RARP header. */
91     *(uint16_t *)(buf + 14) = htons(ARP_HTYPE_ETH); /* hardware addr space */
92     *(uint16_t *)(buf + 16) = htons(ARP_PTYPE_IP); /* protocol addr space */
93     *(buf + 18) = 6; /* hardware addr length (ethernet) */
94     *(buf + 19) = 4; /* protocol addr length (IPv4) */
95     *(uint16_t *)(buf + 20) = htons(ARP_OP_REQUEST_REV); /* opcode */
96     memcpy(buf + 22, mac_addr, 6); /* source hw addr */
97     memset(buf + 28, 0x00, 4);     /* source protocol addr */
98     memcpy(buf + 32, mac_addr, 6); /* target hw addr */
99     memset(buf + 38, 0x00, 4);     /* target protocol addr */
100 
101     /* Padding to get up to 60 bytes (ethernet min packet size, minus FCS). */
102     memset(buf + 42, 0x00, 18);
103 
104     return 60; /* len (FCS will be added by hardware) */
105 }
106 
107 static void qemu_announce_self_iter(NICState *nic, void *opaque)
108 {
109     uint8_t buf[60];
110     int len;
111 
112     trace_qemu_announce_self_iter(qemu_ether_ntoa(&nic->conf->macaddr));
113     len = announce_self_create(buf, nic->conf->macaddr.a);
114 
115     qemu_send_packet_raw(qemu_get_queue(nic), buf, len);
116 }
117 
118 
119 static void qemu_announce_self_once(void *opaque)
120 {
121     static int count = SELF_ANNOUNCE_ROUNDS;
122     QEMUTimer *timer = *(QEMUTimer **)opaque;
123 
124     qemu_foreach_nic(qemu_announce_self_iter, NULL);
125 
126     if (--count) {
127         /* delay 50ms, 150ms, 250ms, ... */
128         timer_mod(timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) +
129                   self_announce_delay(count));
130     } else {
131             timer_del(timer);
132             timer_free(timer);
133     }
134 }
135 
136 void qemu_announce_self(void)
137 {
138     static QEMUTimer *timer;
139     timer = timer_new_ms(QEMU_CLOCK_REALTIME, qemu_announce_self_once, &timer);
140     qemu_announce_self_once(&timer);
141 }
142 
143 /***********************************************************/
144 /* savevm/loadvm support */
145 
146 static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
147                                    int64_t pos)
148 {
149     int ret;
150     QEMUIOVector qiov;
151 
152     qemu_iovec_init_external(&qiov, iov, iovcnt);
153     ret = bdrv_writev_vmstate(opaque, &qiov, pos);
154     if (ret < 0) {
155         return ret;
156     }
157 
158     return qiov.size;
159 }
160 
161 static ssize_t block_put_buffer(void *opaque, const uint8_t *buf,
162                                 int64_t pos, size_t size)
163 {
164     bdrv_save_vmstate(opaque, buf, pos, size);
165     return size;
166 }
167 
168 static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
169                                 size_t size)
170 {
171     return bdrv_load_vmstate(opaque, buf, pos, size);
172 }
173 
174 static int bdrv_fclose(void *opaque)
175 {
176     return bdrv_flush(opaque);
177 }
178 
179 static const QEMUFileOps bdrv_read_ops = {
180     .get_buffer = block_get_buffer,
181     .close =      bdrv_fclose
182 };
183 
184 static const QEMUFileOps bdrv_write_ops = {
185     .put_buffer     = block_put_buffer,
186     .writev_buffer  = block_writev_buffer,
187     .close          = bdrv_fclose
188 };
189 
190 static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
191 {
192     if (is_writable) {
193         return qemu_fopen_ops(bs, &bdrv_write_ops);
194     }
195     return qemu_fopen_ops(bs, &bdrv_read_ops);
196 }
197 
198 
199 /* QEMUFile timer support.
200  * Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c
201  */
202 
203 void timer_put(QEMUFile *f, QEMUTimer *ts)
204 {
205     uint64_t expire_time;
206 
207     expire_time = timer_expire_time_ns(ts);
208     qemu_put_be64(f, expire_time);
209 }
210 
211 void timer_get(QEMUFile *f, QEMUTimer *ts)
212 {
213     uint64_t expire_time;
214 
215     expire_time = qemu_get_be64(f);
216     if (expire_time != -1) {
217         timer_mod_ns(ts, expire_time);
218     } else {
219         timer_del(ts);
220     }
221 }
222 
223 
224 /* VMState timer support.
225  * Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c
226  */
227 
228 static int get_timer(QEMUFile *f, void *pv, size_t size)
229 {
230     QEMUTimer *v = pv;
231     timer_get(f, v);
232     return 0;
233 }
234 
235 static void put_timer(QEMUFile *f, void *pv, size_t size)
236 {
237     QEMUTimer *v = pv;
238     timer_put(f, v);
239 }
240 
241 const VMStateInfo vmstate_info_timer = {
242     .name = "timer",
243     .get  = get_timer,
244     .put  = put_timer,
245 };
246 
247 
248 typedef struct CompatEntry {
249     char idstr[256];
250     int instance_id;
251 } CompatEntry;
252 
253 typedef struct SaveStateEntry {
254     QTAILQ_ENTRY(SaveStateEntry) entry;
255     char idstr[256];
256     int instance_id;
257     int alias_id;
258     int version_id;
259     int section_id;
260     SaveVMHandlers *ops;
261     const VMStateDescription *vmsd;
262     void *opaque;
263     CompatEntry *compat;
264     int is_ram;
265 } SaveStateEntry;
266 
267 typedef struct SaveState {
268     QTAILQ_HEAD(, SaveStateEntry) handlers;
269     int global_section_id;
270     bool skip_configuration;
271     uint32_t len;
272     const char *name;
273 } SaveState;
274 
275 static SaveState savevm_state = {
276     .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
277     .global_section_id = 0,
278     .skip_configuration = false,
279 };
280 
281 void savevm_skip_configuration(void)
282 {
283     savevm_state.skip_configuration = true;
284 }
285 
286 
287 static void configuration_pre_save(void *opaque)
288 {
289     SaveState *state = opaque;
290     const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
291 
292     state->len = strlen(current_name);
293     state->name = current_name;
294 }
295 
296 static int configuration_post_load(void *opaque, int version_id)
297 {
298     SaveState *state = opaque;
299     const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
300 
301     if (strncmp(state->name, current_name, state->len) != 0) {
302         error_report("Machine type received is '%.*s' and local is '%s'",
303                      (int) state->len, state->name, current_name);
304         return -EINVAL;
305     }
306     return 0;
307 }
308 
309 static const VMStateDescription vmstate_configuration = {
310     .name = "configuration",
311     .version_id = 1,
312     .post_load = configuration_post_load,
313     .pre_save = configuration_pre_save,
314     .fields = (VMStateField[]) {
315         VMSTATE_UINT32(len, SaveState),
316         VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, 0, len),
317         VMSTATE_END_OF_LIST()
318     },
319 };
320 
321 static void dump_vmstate_vmsd(FILE *out_file,
322                               const VMStateDescription *vmsd, int indent,
323                               bool is_subsection);
324 
325 static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
326                               int indent)
327 {
328     fprintf(out_file, "%*s{\n", indent, "");
329     indent += 2;
330     fprintf(out_file, "%*s\"field\": \"%s\",\n", indent, "", field->name);
331     fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
332             field->version_id);
333     fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "",
334             field->field_exists ? "true" : "false");
335     fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size);
336     if (field->vmsd != NULL) {
337         fprintf(out_file, ",\n");
338         dump_vmstate_vmsd(out_file, field->vmsd, indent, false);
339     }
340     fprintf(out_file, "\n%*s}", indent - 2, "");
341 }
342 
343 static void dump_vmstate_vmss(FILE *out_file,
344                               const VMStateDescription **subsection,
345                               int indent)
346 {
347     if (*subsection != NULL) {
348         dump_vmstate_vmsd(out_file, *subsection, indent, true);
349     }
350 }
351 
352 static void dump_vmstate_vmsd(FILE *out_file,
353                               const VMStateDescription *vmsd, int indent,
354                               bool is_subsection)
355 {
356     if (is_subsection) {
357         fprintf(out_file, "%*s{\n", indent, "");
358     } else {
359         fprintf(out_file, "%*s\"%s\": {\n", indent, "", "Description");
360     }
361     indent += 2;
362     fprintf(out_file, "%*s\"name\": \"%s\",\n", indent, "", vmsd->name);
363     fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
364             vmsd->version_id);
365     fprintf(out_file, "%*s\"minimum_version_id\": %d", indent, "",
366             vmsd->minimum_version_id);
367     if (vmsd->fields != NULL) {
368         const VMStateField *field = vmsd->fields;
369         bool first;
370 
371         fprintf(out_file, ",\n%*s\"Fields\": [\n", indent, "");
372         first = true;
373         while (field->name != NULL) {
374             if (field->flags & VMS_MUST_EXIST) {
375                 /* Ignore VMSTATE_VALIDATE bits; these don't get migrated */
376                 field++;
377                 continue;
378             }
379             if (!first) {
380                 fprintf(out_file, ",\n");
381             }
382             dump_vmstate_vmsf(out_file, field, indent + 2);
383             field++;
384             first = false;
385         }
386         fprintf(out_file, "\n%*s]", indent, "");
387     }
388     if (vmsd->subsections != NULL) {
389         const VMStateDescription **subsection = vmsd->subsections;
390         bool first;
391 
392         fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
393         first = true;
394         while (*subsection != NULL) {
395             if (!first) {
396                 fprintf(out_file, ",\n");
397             }
398             dump_vmstate_vmss(out_file, subsection, indent + 2);
399             subsection++;
400             first = false;
401         }
402         fprintf(out_file, "\n%*s]", indent, "");
403     }
404     fprintf(out_file, "\n%*s}", indent - 2, "");
405 }
406 
407 static void dump_machine_type(FILE *out_file)
408 {
409     MachineClass *mc;
410 
411     mc = MACHINE_GET_CLASS(current_machine);
412 
413     fprintf(out_file, "  \"vmschkmachine\": {\n");
414     fprintf(out_file, "    \"Name\": \"%s\"\n", mc->name);
415     fprintf(out_file, "  },\n");
416 }
417 
418 void dump_vmstate_json_to_file(FILE *out_file)
419 {
420     GSList *list, *elt;
421     bool first;
422 
423     fprintf(out_file, "{\n");
424     dump_machine_type(out_file);
425 
426     first = true;
427     list = object_class_get_list(TYPE_DEVICE, true);
428     for (elt = list; elt; elt = elt->next) {
429         DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
430                                              TYPE_DEVICE);
431         const char *name;
432         int indent = 2;
433 
434         if (!dc->vmsd) {
435             continue;
436         }
437 
438         if (!first) {
439             fprintf(out_file, ",\n");
440         }
441         name = object_class_get_name(OBJECT_CLASS(dc));
442         fprintf(out_file, "%*s\"%s\": {\n", indent, "", name);
443         indent += 2;
444         fprintf(out_file, "%*s\"Name\": \"%s\",\n", indent, "", name);
445         fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
446                 dc->vmsd->version_id);
447         fprintf(out_file, "%*s\"minimum_version_id\": %d,\n", indent, "",
448                 dc->vmsd->minimum_version_id);
449 
450         dump_vmstate_vmsd(out_file, dc->vmsd, indent, false);
451 
452         fprintf(out_file, "\n%*s}", indent - 2, "");
453         first = false;
454     }
455     fprintf(out_file, "\n}\n");
456     fclose(out_file);
457 }
458 
459 static int calculate_new_instance_id(const char *idstr)
460 {
461     SaveStateEntry *se;
462     int instance_id = 0;
463 
464     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
465         if (strcmp(idstr, se->idstr) == 0
466             && instance_id <= se->instance_id) {
467             instance_id = se->instance_id + 1;
468         }
469     }
470     return instance_id;
471 }
472 
473 static int calculate_compat_instance_id(const char *idstr)
474 {
475     SaveStateEntry *se;
476     int instance_id = 0;
477 
478     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
479         if (!se->compat) {
480             continue;
481         }
482 
483         if (strcmp(idstr, se->compat->idstr) == 0
484             && instance_id <= se->compat->instance_id) {
485             instance_id = se->compat->instance_id + 1;
486         }
487     }
488     return instance_id;
489 }
490 
491 /* TODO: Individual devices generally have very little idea about the rest
492    of the system, so instance_id should be removed/replaced.
493    Meanwhile pass -1 as instance_id if you do not already have a clearly
494    distinguishing id for all instances of your device class. */
495 int register_savevm_live(DeviceState *dev,
496                          const char *idstr,
497                          int instance_id,
498                          int version_id,
499                          SaveVMHandlers *ops,
500                          void *opaque)
501 {
502     SaveStateEntry *se;
503 
504     se = g_new0(SaveStateEntry, 1);
505     se->version_id = version_id;
506     se->section_id = savevm_state.global_section_id++;
507     se->ops = ops;
508     se->opaque = opaque;
509     se->vmsd = NULL;
510     /* if this is a live_savem then set is_ram */
511     if (ops->save_live_setup != NULL) {
512         se->is_ram = 1;
513     }
514 
515     if (dev) {
516         char *id = qdev_get_dev_path(dev);
517         if (id) {
518             pstrcpy(se->idstr, sizeof(se->idstr), id);
519             pstrcat(se->idstr, sizeof(se->idstr), "/");
520             g_free(id);
521 
522             se->compat = g_new0(CompatEntry, 1);
523             pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), idstr);
524             se->compat->instance_id = instance_id == -1 ?
525                          calculate_compat_instance_id(idstr) : instance_id;
526             instance_id = -1;
527         }
528     }
529     pstrcat(se->idstr, sizeof(se->idstr), idstr);
530 
531     if (instance_id == -1) {
532         se->instance_id = calculate_new_instance_id(se->idstr);
533     } else {
534         se->instance_id = instance_id;
535     }
536     assert(!se->compat || se->instance_id == 0);
537     /* add at the end of list */
538     QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
539     return 0;
540 }
541 
542 int register_savevm(DeviceState *dev,
543                     const char *idstr,
544                     int instance_id,
545                     int version_id,
546                     SaveStateHandler *save_state,
547                     LoadStateHandler *load_state,
548                     void *opaque)
549 {
550     SaveVMHandlers *ops = g_new0(SaveVMHandlers, 1);
551     ops->save_state = save_state;
552     ops->load_state = load_state;
553     return register_savevm_live(dev, idstr, instance_id, version_id,
554                                 ops, opaque);
555 }
556 
557 void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
558 {
559     SaveStateEntry *se, *new_se;
560     char id[256] = "";
561 
562     if (dev) {
563         char *path = qdev_get_dev_path(dev);
564         if (path) {
565             pstrcpy(id, sizeof(id), path);
566             pstrcat(id, sizeof(id), "/");
567             g_free(path);
568         }
569     }
570     pstrcat(id, sizeof(id), idstr);
571 
572     QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
573         if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
574             QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
575             g_free(se->compat);
576             g_free(se->ops);
577             g_free(se);
578         }
579     }
580 }
581 
582 int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
583                                    const VMStateDescription *vmsd,
584                                    void *opaque, int alias_id,
585                                    int required_for_version)
586 {
587     SaveStateEntry *se;
588 
589     /* If this triggers, alias support can be dropped for the vmsd. */
590     assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
591 
592     se = g_new0(SaveStateEntry, 1);
593     se->version_id = vmsd->version_id;
594     se->section_id = savevm_state.global_section_id++;
595     se->opaque = opaque;
596     se->vmsd = vmsd;
597     se->alias_id = alias_id;
598 
599     if (dev) {
600         char *id = qdev_get_dev_path(dev);
601         if (id) {
602             pstrcpy(se->idstr, sizeof(se->idstr), id);
603             pstrcat(se->idstr, sizeof(se->idstr), "/");
604             g_free(id);
605 
606             se->compat = g_new0(CompatEntry, 1);
607             pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
608             se->compat->instance_id = instance_id == -1 ?
609                          calculate_compat_instance_id(vmsd->name) : instance_id;
610             instance_id = -1;
611         }
612     }
613     pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
614 
615     if (instance_id == -1) {
616         se->instance_id = calculate_new_instance_id(se->idstr);
617     } else {
618         se->instance_id = instance_id;
619     }
620     assert(!se->compat || se->instance_id == 0);
621     /* add at the end of list */
622     QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
623     return 0;
624 }
625 
626 void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
627                         void *opaque)
628 {
629     SaveStateEntry *se, *new_se;
630 
631     QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
632         if (se->vmsd == vmsd && se->opaque == opaque) {
633             QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
634             g_free(se->compat);
635             g_free(se);
636         }
637     }
638 }
639 
640 static int vmstate_load(QEMUFile *f, SaveStateEntry *se, int version_id)
641 {
642     trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
643     if (!se->vmsd) {         /* Old style */
644         return se->ops->load_state(f, se->opaque, version_id);
645     }
646     return vmstate_load_state(f, se->vmsd, se->opaque, version_id);
647 }
648 
649 static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
650 {
651     int64_t old_offset, size;
652 
653     old_offset = qemu_ftell_fast(f);
654     se->ops->save_state(f, se->opaque);
655     size = qemu_ftell_fast(f) - old_offset;
656 
657     if (vmdesc) {
658         json_prop_int(vmdesc, "size", size);
659         json_start_array(vmdesc, "fields");
660         json_start_object(vmdesc, NULL);
661         json_prop_str(vmdesc, "name", "data");
662         json_prop_int(vmdesc, "size", size);
663         json_prop_str(vmdesc, "type", "buffer");
664         json_end_object(vmdesc);
665         json_end_array(vmdesc);
666     }
667 }
668 
669 static void vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
670 {
671     trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
672     if (!se->vmsd) {
673         vmstate_save_old_style(f, se, vmdesc);
674         return;
675     }
676     vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
677 }
678 
679 void savevm_skip_section_footers(void)
680 {
681     skip_section_footers = true;
682 }
683 
684 /*
685  * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
686  */
687 static void save_section_header(QEMUFile *f, SaveStateEntry *se,
688                                 uint8_t section_type)
689 {
690     qemu_put_byte(f, section_type);
691     qemu_put_be32(f, se->section_id);
692 
693     if (section_type == QEMU_VM_SECTION_FULL ||
694         section_type == QEMU_VM_SECTION_START) {
695         /* ID string */
696         size_t len = strlen(se->idstr);
697         qemu_put_byte(f, len);
698         qemu_put_buffer(f, (uint8_t *)se->idstr, len);
699 
700         qemu_put_be32(f, se->instance_id);
701         qemu_put_be32(f, se->version_id);
702     }
703 }
704 
705 /*
706  * Write a footer onto device sections that catches cases misformatted device
707  * sections.
708  */
709 static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
710 {
711     if (!skip_section_footers) {
712         qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
713         qemu_put_be32(f, se->section_id);
714     }
715 }
716 
717 /**
718  * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
719  *                           command and associated data.
720  *
721  * @f: File to send command on
722  * @command: Command type to send
723  * @len: Length of associated data
724  * @data: Data associated with command.
725  */
726 void qemu_savevm_command_send(QEMUFile *f,
727                               enum qemu_vm_cmd command,
728                               uint16_t len,
729                               uint8_t *data)
730 {
731     trace_savevm_command_send(command, len);
732     qemu_put_byte(f, QEMU_VM_COMMAND);
733     qemu_put_be16(f, (uint16_t)command);
734     qemu_put_be16(f, len);
735     qemu_put_buffer(f, data, len);
736     qemu_fflush(f);
737 }
738 
739 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
740 {
741     uint32_t buf;
742 
743     trace_savevm_send_ping(value);
744     buf = cpu_to_be32(value);
745     qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf);
746 }
747 
748 void qemu_savevm_send_open_return_path(QEMUFile *f)
749 {
750     trace_savevm_send_open_return_path();
751     qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
752 }
753 
754 /* We have a buffer of data to send; we don't want that all to be loaded
755  * by the command itself, so the command contains just the length of the
756  * extra buffer that we then send straight after it.
757  * TODO: Must be a better way to organise that
758  *
759  * Returns:
760  *    0 on success
761  *    -ve on error
762  */
763 int qemu_savevm_send_packaged(QEMUFile *f, const QEMUSizedBuffer *qsb)
764 {
765     size_t cur_iov;
766     size_t len = qsb_get_length(qsb);
767     uint32_t tmp;
768 
769     if (len > MAX_VM_CMD_PACKAGED_SIZE) {
770         error_report("%s: Unreasonably large packaged state: %zu",
771                      __func__, len);
772         return -1;
773     }
774 
775     tmp = cpu_to_be32(len);
776 
777     trace_qemu_savevm_send_packaged();
778     qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
779 
780     /* all the data follows (concatinating the iov's) */
781     for (cur_iov = 0; cur_iov < qsb->n_iov; cur_iov++) {
782         /* The iov entries are partially filled */
783         size_t towrite = MIN(qsb->iov[cur_iov].iov_len, len);
784         len -= towrite;
785 
786         if (!towrite) {
787             break;
788         }
789 
790         qemu_put_buffer(f, qsb->iov[cur_iov].iov_base, towrite);
791     }
792 
793     return 0;
794 }
795 
796 /* Send prior to any postcopy transfer */
797 void qemu_savevm_send_postcopy_advise(QEMUFile *f)
798 {
799     uint64_t tmp[2];
800     tmp[0] = cpu_to_be64(getpagesize());
801     tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
802 
803     trace_qemu_savevm_send_postcopy_advise();
804     qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp);
805 }
806 
807 /* Sent prior to starting the destination running in postcopy, discard pages
808  * that have already been sent but redirtied on the source.
809  * CMD_POSTCOPY_RAM_DISCARD consist of:
810  *      byte   version (0)
811  *      byte   Length of name field (not including 0)
812  *  n x byte   RAM block name
813  *      byte   0 terminator (just for safety)
814  *  n x        Byte ranges within the named RAMBlock
815  *      be64   Start of the range
816  *      be64   Length
817  *
818  *  name:  RAMBlock name that these entries are part of
819  *  len: Number of page entries
820  *  start_list: 'len' addresses
821  *  length_list: 'len' addresses
822  *
823  */
824 void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
825                                            uint16_t len,
826                                            uint64_t *start_list,
827                                            uint64_t *length_list)
828 {
829     uint8_t *buf;
830     uint16_t tmplen;
831     uint16_t t;
832     size_t name_len = strlen(name);
833 
834     trace_qemu_savevm_send_postcopy_ram_discard(name, len);
835     assert(name_len < 256);
836     buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len);
837     buf[0] = postcopy_ram_discard_version;
838     buf[1] = name_len;
839     memcpy(buf + 2, name, name_len);
840     tmplen = 2 + name_len;
841     buf[tmplen++] = '\0';
842 
843     for (t = 0; t < len; t++) {
844         cpu_to_be64w((uint64_t *)(buf + tmplen), start_list[t]);
845         tmplen += 8;
846         cpu_to_be64w((uint64_t *)(buf + tmplen), length_list[t]);
847         tmplen += 8;
848     }
849     qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
850     g_free(buf);
851 }
852 
853 /* Get the destination into a state where it can receive postcopy data. */
854 void qemu_savevm_send_postcopy_listen(QEMUFile *f)
855 {
856     trace_savevm_send_postcopy_listen();
857     qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
858 }
859 
860 /* Kick the destination into running */
861 void qemu_savevm_send_postcopy_run(QEMUFile *f)
862 {
863     trace_savevm_send_postcopy_run();
864     qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
865 }
866 
867 bool qemu_savevm_state_blocked(Error **errp)
868 {
869     SaveStateEntry *se;
870 
871     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
872         if (se->vmsd && se->vmsd->unmigratable) {
873             error_setg(errp, "State blocked by non-migratable device '%s'",
874                        se->idstr);
875             return true;
876         }
877     }
878     return false;
879 }
880 
881 static bool enforce_config_section(void)
882 {
883     MachineState *machine = MACHINE(qdev_get_machine());
884     return machine->enforce_config_section;
885 }
886 
887 void qemu_savevm_state_header(QEMUFile *f)
888 {
889     trace_savevm_state_header();
890     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
891     qemu_put_be32(f, QEMU_VM_FILE_VERSION);
892 
893     if (!savevm_state.skip_configuration || enforce_config_section()) {
894         qemu_put_byte(f, QEMU_VM_CONFIGURATION);
895         vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
896     }
897 
898 }
899 
900 void qemu_savevm_state_begin(QEMUFile *f,
901                              const MigrationParams *params)
902 {
903     SaveStateEntry *se;
904     int ret;
905 
906     trace_savevm_state_begin();
907     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
908         if (!se->ops || !se->ops->set_params) {
909             continue;
910         }
911         se->ops->set_params(params, se->opaque);
912     }
913 
914     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
915         if (!se->ops || !se->ops->save_live_setup) {
916             continue;
917         }
918         if (se->ops && se->ops->is_active) {
919             if (!se->ops->is_active(se->opaque)) {
920                 continue;
921             }
922         }
923         save_section_header(f, se, QEMU_VM_SECTION_START);
924 
925         ret = se->ops->save_live_setup(f, se->opaque);
926         save_section_footer(f, se);
927         if (ret < 0) {
928             qemu_file_set_error(f, ret);
929             break;
930         }
931     }
932 }
933 
934 /*
935  * this function has three return values:
936  *   negative: there was one error, and we have -errno.
937  *   0 : We haven't finished, caller have to go again
938  *   1 : We have finished, we can go to complete phase
939  */
940 int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
941 {
942     SaveStateEntry *se;
943     int ret = 1;
944 
945     trace_savevm_state_iterate();
946     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
947         if (!se->ops || !se->ops->save_live_iterate) {
948             continue;
949         }
950         if (se->ops && se->ops->is_active) {
951             if (!se->ops->is_active(se->opaque)) {
952                 continue;
953             }
954         }
955         /*
956          * In the postcopy phase, any device that doesn't know how to
957          * do postcopy should have saved it's state in the _complete
958          * call that's already run, it might get confused if we call
959          * iterate afterwards.
960          */
961         if (postcopy && !se->ops->save_live_complete_postcopy) {
962             continue;
963         }
964         if (qemu_file_rate_limit(f)) {
965             return 0;
966         }
967         trace_savevm_section_start(se->idstr, se->section_id);
968 
969         save_section_header(f, se, QEMU_VM_SECTION_PART);
970 
971         ret = se->ops->save_live_iterate(f, se->opaque);
972         trace_savevm_section_end(se->idstr, se->section_id, ret);
973         save_section_footer(f, se);
974 
975         if (ret < 0) {
976             qemu_file_set_error(f, ret);
977         }
978         if (ret <= 0) {
979             /* Do not proceed to the next vmstate before this one reported
980                completion of the current stage. This serializes the migration
981                and reduces the probability that a faster changing state is
982                synchronized over and over again. */
983             break;
984         }
985     }
986     return ret;
987 }
988 
989 static bool should_send_vmdesc(void)
990 {
991     MachineState *machine = MACHINE(qdev_get_machine());
992     bool in_postcopy = migration_in_postcopy(migrate_get_current());
993     return !machine->suppress_vmdesc && !in_postcopy;
994 }
995 
996 /*
997  * Calls the save_live_complete_postcopy methods
998  * causing the last few pages to be sent immediately and doing any associated
999  * cleanup.
1000  * Note postcopy also calls qemu_savevm_state_complete_precopy to complete
1001  * all the other devices, but that happens at the point we switch to postcopy.
1002  */
1003 void qemu_savevm_state_complete_postcopy(QEMUFile *f)
1004 {
1005     SaveStateEntry *se;
1006     int ret;
1007 
1008     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1009         if (!se->ops || !se->ops->save_live_complete_postcopy) {
1010             continue;
1011         }
1012         if (se->ops && se->ops->is_active) {
1013             if (!se->ops->is_active(se->opaque)) {
1014                 continue;
1015             }
1016         }
1017         trace_savevm_section_start(se->idstr, se->section_id);
1018         /* Section type */
1019         qemu_put_byte(f, QEMU_VM_SECTION_END);
1020         qemu_put_be32(f, se->section_id);
1021 
1022         ret = se->ops->save_live_complete_postcopy(f, se->opaque);
1023         trace_savevm_section_end(se->idstr, se->section_id, ret);
1024         save_section_footer(f, se);
1025         if (ret < 0) {
1026             qemu_file_set_error(f, ret);
1027             return;
1028         }
1029     }
1030 
1031     qemu_put_byte(f, QEMU_VM_EOF);
1032     qemu_fflush(f);
1033 }
1034 
1035 void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only)
1036 {
1037     QJSON *vmdesc;
1038     int vmdesc_len;
1039     SaveStateEntry *se;
1040     int ret;
1041     bool in_postcopy = migration_in_postcopy(migrate_get_current());
1042 
1043     trace_savevm_state_complete_precopy();
1044 
1045     cpu_synchronize_all_states();
1046 
1047     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1048         if (!se->ops ||
1049             (in_postcopy && se->ops->save_live_complete_postcopy) ||
1050             (in_postcopy && !iterable_only) ||
1051             !se->ops->save_live_complete_precopy) {
1052             continue;
1053         }
1054 
1055         if (se->ops && se->ops->is_active) {
1056             if (!se->ops->is_active(se->opaque)) {
1057                 continue;
1058             }
1059         }
1060         trace_savevm_section_start(se->idstr, se->section_id);
1061 
1062         save_section_header(f, se, QEMU_VM_SECTION_END);
1063 
1064         ret = se->ops->save_live_complete_precopy(f, se->opaque);
1065         trace_savevm_section_end(se->idstr, se->section_id, ret);
1066         save_section_footer(f, se);
1067         if (ret < 0) {
1068             qemu_file_set_error(f, ret);
1069             return;
1070         }
1071     }
1072 
1073     if (iterable_only) {
1074         return;
1075     }
1076 
1077     vmdesc = qjson_new();
1078     json_prop_int(vmdesc, "page_size", TARGET_PAGE_SIZE);
1079     json_start_array(vmdesc, "devices");
1080     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1081 
1082         if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
1083             continue;
1084         }
1085         if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
1086             trace_savevm_section_skip(se->idstr, se->section_id);
1087             continue;
1088         }
1089 
1090         trace_savevm_section_start(se->idstr, se->section_id);
1091 
1092         json_start_object(vmdesc, NULL);
1093         json_prop_str(vmdesc, "name", se->idstr);
1094         json_prop_int(vmdesc, "instance_id", se->instance_id);
1095 
1096         save_section_header(f, se, QEMU_VM_SECTION_FULL);
1097         vmstate_save(f, se, vmdesc);
1098         trace_savevm_section_end(se->idstr, se->section_id, 0);
1099         save_section_footer(f, se);
1100 
1101         json_end_object(vmdesc);
1102     }
1103 
1104     if (!in_postcopy) {
1105         /* Postcopy stream will still be going */
1106         qemu_put_byte(f, QEMU_VM_EOF);
1107     }
1108 
1109     json_end_array(vmdesc);
1110     qjson_finish(vmdesc);
1111     vmdesc_len = strlen(qjson_get_str(vmdesc));
1112 
1113     if (should_send_vmdesc()) {
1114         qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
1115         qemu_put_be32(f, vmdesc_len);
1116         qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len);
1117     }
1118     object_unref(OBJECT(vmdesc));
1119 
1120     qemu_fflush(f);
1121 }
1122 
1123 /* Give an estimate of the amount left to be transferred,
1124  * the result is split into the amount for units that can and
1125  * for units that can't do postcopy.
1126  */
1127 void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
1128                                uint64_t *res_non_postcopiable,
1129                                uint64_t *res_postcopiable)
1130 {
1131     SaveStateEntry *se;
1132 
1133     *res_non_postcopiable = 0;
1134     *res_postcopiable = 0;
1135 
1136 
1137     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1138         if (!se->ops || !se->ops->save_live_pending) {
1139             continue;
1140         }
1141         if (se->ops && se->ops->is_active) {
1142             if (!se->ops->is_active(se->opaque)) {
1143                 continue;
1144             }
1145         }
1146         se->ops->save_live_pending(f, se->opaque, max_size,
1147                                    res_non_postcopiable, res_postcopiable);
1148     }
1149 }
1150 
1151 void qemu_savevm_state_cleanup(void)
1152 {
1153     SaveStateEntry *se;
1154 
1155     trace_savevm_state_cleanup();
1156     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1157         if (se->ops && se->ops->cleanup) {
1158             se->ops->cleanup(se->opaque);
1159         }
1160     }
1161 }
1162 
1163 static int qemu_savevm_state(QEMUFile *f, Error **errp)
1164 {
1165     int ret;
1166     MigrationParams params = {
1167         .blk = 0,
1168         .shared = 0
1169     };
1170     MigrationState *ms = migrate_init(&params);
1171     ms->to_dst_file = f;
1172 
1173     if (qemu_savevm_state_blocked(errp)) {
1174         return -EINVAL;
1175     }
1176 
1177     qemu_mutex_unlock_iothread();
1178     qemu_savevm_state_header(f);
1179     qemu_savevm_state_begin(f, &params);
1180     qemu_mutex_lock_iothread();
1181 
1182     while (qemu_file_get_error(f) == 0) {
1183         if (qemu_savevm_state_iterate(f, false) > 0) {
1184             break;
1185         }
1186     }
1187 
1188     ret = qemu_file_get_error(f);
1189     if (ret == 0) {
1190         qemu_savevm_state_complete_precopy(f, false);
1191         ret = qemu_file_get_error(f);
1192     }
1193     qemu_savevm_state_cleanup();
1194     if (ret != 0) {
1195         error_setg_errno(errp, -ret, "Error while writing VM state");
1196     }
1197     return ret;
1198 }
1199 
1200 static int qemu_save_device_state(QEMUFile *f)
1201 {
1202     SaveStateEntry *se;
1203 
1204     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
1205     qemu_put_be32(f, QEMU_VM_FILE_VERSION);
1206 
1207     cpu_synchronize_all_states();
1208 
1209     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1210         if (se->is_ram) {
1211             continue;
1212         }
1213         if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
1214             continue;
1215         }
1216         if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
1217             continue;
1218         }
1219 
1220         save_section_header(f, se, QEMU_VM_SECTION_FULL);
1221 
1222         vmstate_save(f, se, NULL);
1223 
1224         save_section_footer(f, se);
1225     }
1226 
1227     qemu_put_byte(f, QEMU_VM_EOF);
1228 
1229     return qemu_file_get_error(f);
1230 }
1231 
1232 static SaveStateEntry *find_se(const char *idstr, int instance_id)
1233 {
1234     SaveStateEntry *se;
1235 
1236     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1237         if (!strcmp(se->idstr, idstr) &&
1238             (instance_id == se->instance_id ||
1239              instance_id == se->alias_id))
1240             return se;
1241         /* Migrating from an older version? */
1242         if (strstr(se->idstr, idstr) && se->compat) {
1243             if (!strcmp(se->compat->idstr, idstr) &&
1244                 (instance_id == se->compat->instance_id ||
1245                  instance_id == se->alias_id))
1246                 return se;
1247         }
1248     }
1249     return NULL;
1250 }
1251 
1252 enum LoadVMExitCodes {
1253     /* Allow a command to quit all layers of nested loadvm loops */
1254     LOADVM_QUIT     =  1,
1255 };
1256 
1257 static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
1258 
1259 /* ------ incoming postcopy messages ------ */
1260 /* 'advise' arrives before any transfers just to tell us that a postcopy
1261  * *might* happen - it might be skipped if precopy transferred everything
1262  * quickly.
1263  */
1264 static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
1265 {
1266     PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
1267     uint64_t remote_hps, remote_tps;
1268 
1269     trace_loadvm_postcopy_handle_advise();
1270     if (ps != POSTCOPY_INCOMING_NONE) {
1271         error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
1272         return -1;
1273     }
1274 
1275     if (!postcopy_ram_supported_by_host()) {
1276         return -1;
1277     }
1278 
1279     remote_hps = qemu_get_be64(mis->from_src_file);
1280     if (remote_hps != getpagesize())  {
1281         /*
1282          * Some combinations of mismatch are probably possible but it gets
1283          * a bit more complicated.  In particular we need to place whole
1284          * host pages on the dest at once, and we need to ensure that we
1285          * handle dirtying to make sure we never end up sending part of
1286          * a hostpage on it's own.
1287          */
1288         error_report("Postcopy needs matching host page sizes (s=%d d=%d)",
1289                      (int)remote_hps, getpagesize());
1290         return -1;
1291     }
1292 
1293     remote_tps = qemu_get_be64(mis->from_src_file);
1294     if (remote_tps != (1ul << qemu_target_page_bits())) {
1295         /*
1296          * Again, some differences could be dealt with, but for now keep it
1297          * simple.
1298          */
1299         error_report("Postcopy needs matching target page sizes (s=%d d=%d)",
1300                      (int)remote_tps, 1 << qemu_target_page_bits());
1301         return -1;
1302     }
1303 
1304     if (ram_postcopy_incoming_init(mis)) {
1305         return -1;
1306     }
1307 
1308     postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
1309 
1310     return 0;
1311 }
1312 
1313 /* After postcopy we will be told to throw some pages away since they're
1314  * dirty and will have to be demand fetched.  Must happen before CPU is
1315  * started.
1316  * There can be 0..many of these messages, each encoding multiple pages.
1317  */
1318 static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
1319                                               uint16_t len)
1320 {
1321     int tmp;
1322     char ramid[256];
1323     PostcopyState ps = postcopy_state_get();
1324 
1325     trace_loadvm_postcopy_ram_handle_discard();
1326 
1327     switch (ps) {
1328     case POSTCOPY_INCOMING_ADVISE:
1329         /* 1st discard */
1330         tmp = postcopy_ram_prepare_discard(mis);
1331         if (tmp) {
1332             return tmp;
1333         }
1334         break;
1335 
1336     case POSTCOPY_INCOMING_DISCARD:
1337         /* Expected state */
1338         break;
1339 
1340     default:
1341         error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
1342                      ps);
1343         return -1;
1344     }
1345     /* We're expecting a
1346      *    Version (0)
1347      *    a RAM ID string (length byte, name, 0 term)
1348      *    then at least 1 16 byte chunk
1349     */
1350     if (len < (1 + 1 + 1 + 1 + 2 * 8)) {
1351         error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1352         return -1;
1353     }
1354 
1355     tmp = qemu_get_byte(mis->from_src_file);
1356     if (tmp != postcopy_ram_discard_version) {
1357         error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
1358         return -1;
1359     }
1360 
1361     if (!qemu_get_counted_string(mis->from_src_file, ramid)) {
1362         error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
1363         return -1;
1364     }
1365     tmp = qemu_get_byte(mis->from_src_file);
1366     if (tmp != 0) {
1367         error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp);
1368         return -1;
1369     }
1370 
1371     len -= 3 + strlen(ramid);
1372     if (len % 16) {
1373         error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1374         return -1;
1375     }
1376     trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
1377     while (len) {
1378         uint64_t start_addr, block_length;
1379         start_addr = qemu_get_be64(mis->from_src_file);
1380         block_length = qemu_get_be64(mis->from_src_file);
1381 
1382         len -= 16;
1383         int ret = ram_discard_range(mis, ramid, start_addr,
1384                                     block_length);
1385         if (ret) {
1386             return ret;
1387         }
1388     }
1389     trace_loadvm_postcopy_ram_handle_discard_end();
1390 
1391     return 0;
1392 }
1393 
1394 /*
1395  * Triggered by a postcopy_listen command; this thread takes over reading
1396  * the input stream, leaving the main thread free to carry on loading the rest
1397  * of the device state (from RAM).
1398  * (TODO:This could do with being in a postcopy file - but there again it's
1399  * just another input loop, not that postcopy specific)
1400  */
1401 static void *postcopy_ram_listen_thread(void *opaque)
1402 {
1403     QEMUFile *f = opaque;
1404     MigrationIncomingState *mis = migration_incoming_get_current();
1405     int load_res;
1406 
1407     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
1408                                    MIGRATION_STATUS_POSTCOPY_ACTIVE);
1409     qemu_sem_post(&mis->listen_thread_sem);
1410     trace_postcopy_ram_listen_thread_start();
1411 
1412     /*
1413      * Because we're a thread and not a coroutine we can't yield
1414      * in qemu_file, and thus we must be blocking now.
1415      */
1416     qemu_file_set_blocking(f, true);
1417     load_res = qemu_loadvm_state_main(f, mis);
1418     /* And non-blocking again so we don't block in any cleanup */
1419     qemu_file_set_blocking(f, false);
1420 
1421     trace_postcopy_ram_listen_thread_exit();
1422     if (load_res < 0) {
1423         error_report("%s: loadvm failed: %d", __func__, load_res);
1424         qemu_file_set_error(f, load_res);
1425         migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1426                                        MIGRATION_STATUS_FAILED);
1427     } else {
1428         /*
1429          * This looks good, but it's possible that the device loading in the
1430          * main thread hasn't finished yet, and so we might not be in 'RUN'
1431          * state yet; wait for the end of the main thread.
1432          */
1433         qemu_event_wait(&mis->main_thread_load_event);
1434     }
1435     postcopy_ram_incoming_cleanup(mis);
1436 
1437     if (load_res < 0) {
1438         /*
1439          * If something went wrong then we have a bad state so exit;
1440          * depending how far we got it might be possible at this point
1441          * to leave the guest running and fire MCEs for pages that never
1442          * arrived as a desperate recovery step.
1443          */
1444         exit(EXIT_FAILURE);
1445     }
1446 
1447     migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1448                                    MIGRATION_STATUS_COMPLETED);
1449     /*
1450      * If everything has worked fine, then the main thread has waited
1451      * for us to start, and we're the last use of the mis.
1452      * (If something broke then qemu will have to exit anyway since it's
1453      * got a bad migration state).
1454      */
1455     migration_incoming_state_destroy();
1456 
1457 
1458     return NULL;
1459 }
1460 
1461 /* After this message we must be able to immediately receive postcopy data */
1462 static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
1463 {
1464     PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
1465     trace_loadvm_postcopy_handle_listen();
1466     if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
1467         error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
1468         return -1;
1469     }
1470     if (ps == POSTCOPY_INCOMING_ADVISE) {
1471         /*
1472          * A rare case, we entered listen without having to do any discards,
1473          * so do the setup that's normally done at the time of the 1st discard.
1474          */
1475         postcopy_ram_prepare_discard(mis);
1476     }
1477 
1478     /*
1479      * Sensitise RAM - can now generate requests for blocks that don't exist
1480      * However, at this point the CPU shouldn't be running, and the IO
1481      * shouldn't be doing anything yet so don't actually expect requests
1482      */
1483     if (postcopy_ram_enable_notify(mis)) {
1484         return -1;
1485     }
1486 
1487     if (mis->have_listen_thread) {
1488         error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread");
1489         return -1;
1490     }
1491 
1492     mis->have_listen_thread = true;
1493     /* Start up the listening thread and wait for it to signal ready */
1494     qemu_sem_init(&mis->listen_thread_sem, 0);
1495     qemu_thread_create(&mis->listen_thread, "postcopy/listen",
1496                        postcopy_ram_listen_thread, mis->from_src_file,
1497                        QEMU_THREAD_JOINABLE);
1498     qemu_sem_wait(&mis->listen_thread_sem);
1499     qemu_sem_destroy(&mis->listen_thread_sem);
1500 
1501     return 0;
1502 }
1503 
1504 static void loadvm_postcopy_handle_run_bh(void *opaque)
1505 {
1506     Error *local_err = NULL;
1507     MigrationIncomingState *mis = opaque;
1508 
1509     /* TODO we should move all of this lot into postcopy_ram.c or a shared code
1510      * in migration.c
1511      */
1512     cpu_synchronize_all_post_init();
1513 
1514     qemu_announce_self();
1515 
1516     /* Make sure all file formats flush their mutable metadata */
1517     bdrv_invalidate_cache_all(&local_err);
1518     if (local_err) {
1519         error_report_err(local_err);
1520     }
1521 
1522     trace_loadvm_postcopy_handle_run_cpu_sync();
1523     cpu_synchronize_all_post_init();
1524 
1525     trace_loadvm_postcopy_handle_run_vmstart();
1526 
1527     if (autostart) {
1528         /* Hold onto your hats, starting the CPU */
1529         vm_start();
1530     } else {
1531         /* leave it paused and let management decide when to start the CPU */
1532         runstate_set(RUN_STATE_PAUSED);
1533     }
1534 
1535     qemu_bh_delete(mis->bh);
1536 }
1537 
1538 /* After all discards we can start running and asking for pages */
1539 static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
1540 {
1541     PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
1542 
1543     trace_loadvm_postcopy_handle_run();
1544     if (ps != POSTCOPY_INCOMING_LISTENING) {
1545         error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
1546         return -1;
1547     }
1548 
1549     mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, NULL);
1550     qemu_bh_schedule(mis->bh);
1551 
1552     /* We need to finish reading the stream from the package
1553      * and also stop reading anything more from the stream that loaded the
1554      * package (since it's now being read by the listener thread).
1555      * LOADVM_QUIT will quit all the layers of nested loadvm loops.
1556      */
1557     return LOADVM_QUIT;
1558 }
1559 
1560 /**
1561  * Immediately following this command is a blob of data containing an embedded
1562  * chunk of migration stream; read it and load it.
1563  *
1564  * @mis: Incoming state
1565  * @length: Length of packaged data to read
1566  *
1567  * Returns: Negative values on error
1568  *
1569  */
1570 static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
1571 {
1572     int ret;
1573     uint8_t *buffer;
1574     uint32_t length;
1575     QEMUSizedBuffer *qsb;
1576 
1577     length = qemu_get_be32(mis->from_src_file);
1578     trace_loadvm_handle_cmd_packaged(length);
1579 
1580     if (length > MAX_VM_CMD_PACKAGED_SIZE) {
1581         error_report("Unreasonably large packaged state: %u", length);
1582         return -1;
1583     }
1584     buffer = g_malloc0(length);
1585     ret = qemu_get_buffer(mis->from_src_file, buffer, (int)length);
1586     if (ret != length) {
1587         g_free(buffer);
1588         error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%d",
1589                      ret, length);
1590         return (ret < 0) ? ret : -EAGAIN;
1591     }
1592     trace_loadvm_handle_cmd_packaged_received(ret);
1593 
1594     /* Setup a dummy QEMUFile that actually reads from the buffer */
1595     qsb = qsb_create(buffer, length);
1596     g_free(buffer); /* Because qsb_create copies */
1597     if (!qsb) {
1598         error_report("Unable to create qsb");
1599     }
1600     QEMUFile *packf = qemu_bufopen("r", qsb);
1601 
1602     ret = qemu_loadvm_state_main(packf, mis);
1603     trace_loadvm_handle_cmd_packaged_main(ret);
1604     qemu_fclose(packf);
1605     qsb_free(qsb);
1606 
1607     return ret;
1608 }
1609 
1610 /*
1611  * Process an incoming 'QEMU_VM_COMMAND'
1612  * 0           just a normal return
1613  * LOADVM_QUIT All good, but exit the loop
1614  * <0          Error
1615  */
1616 static int loadvm_process_command(QEMUFile *f)
1617 {
1618     MigrationIncomingState *mis = migration_incoming_get_current();
1619     uint16_t cmd;
1620     uint16_t len;
1621     uint32_t tmp32;
1622 
1623     cmd = qemu_get_be16(f);
1624     len = qemu_get_be16(f);
1625 
1626     trace_loadvm_process_command(cmd, len);
1627     if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
1628         error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
1629         return -EINVAL;
1630     }
1631 
1632     if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
1633         error_report("%s received with bad length - expecting %zu, got %d",
1634                      mig_cmd_args[cmd].name,
1635                      (size_t)mig_cmd_args[cmd].len, len);
1636         return -ERANGE;
1637     }
1638 
1639     switch (cmd) {
1640     case MIG_CMD_OPEN_RETURN_PATH:
1641         if (mis->to_src_file) {
1642             error_report("CMD_OPEN_RETURN_PATH called when RP already open");
1643             /* Not really a problem, so don't give up */
1644             return 0;
1645         }
1646         mis->to_src_file = qemu_file_get_return_path(f);
1647         if (!mis->to_src_file) {
1648             error_report("CMD_OPEN_RETURN_PATH failed");
1649             return -1;
1650         }
1651         break;
1652 
1653     case MIG_CMD_PING:
1654         tmp32 = qemu_get_be32(f);
1655         trace_loadvm_process_command_ping(tmp32);
1656         if (!mis->to_src_file) {
1657             error_report("CMD_PING (0x%x) received with no return path",
1658                          tmp32);
1659             return -1;
1660         }
1661         migrate_send_rp_pong(mis, tmp32);
1662         break;
1663 
1664     case MIG_CMD_PACKAGED:
1665         return loadvm_handle_cmd_packaged(mis);
1666 
1667     case MIG_CMD_POSTCOPY_ADVISE:
1668         return loadvm_postcopy_handle_advise(mis);
1669 
1670     case MIG_CMD_POSTCOPY_LISTEN:
1671         return loadvm_postcopy_handle_listen(mis);
1672 
1673     case MIG_CMD_POSTCOPY_RUN:
1674         return loadvm_postcopy_handle_run(mis);
1675 
1676     case MIG_CMD_POSTCOPY_RAM_DISCARD:
1677         return loadvm_postcopy_ram_handle_discard(mis, len);
1678     }
1679 
1680     return 0;
1681 }
1682 
1683 struct LoadStateEntry {
1684     QLIST_ENTRY(LoadStateEntry) entry;
1685     SaveStateEntry *se;
1686     int section_id;
1687     int version_id;
1688 };
1689 
1690 /*
1691  * Read a footer off the wire and check that it matches the expected section
1692  *
1693  * Returns: true if the footer was good
1694  *          false if there is a problem (and calls error_report to say why)
1695  */
1696 static bool check_section_footer(QEMUFile *f, LoadStateEntry *le)
1697 {
1698     uint8_t read_mark;
1699     uint32_t read_section_id;
1700 
1701     if (skip_section_footers) {
1702         /* No footer to check */
1703         return true;
1704     }
1705 
1706     read_mark = qemu_get_byte(f);
1707 
1708     if (read_mark != QEMU_VM_SECTION_FOOTER) {
1709         error_report("Missing section footer for %s", le->se->idstr);
1710         return false;
1711     }
1712 
1713     read_section_id = qemu_get_be32(f);
1714     if (read_section_id != le->section_id) {
1715         error_report("Mismatched section id in footer for %s -"
1716                      " read 0x%x expected 0x%x",
1717                      le->se->idstr, read_section_id, le->section_id);
1718         return false;
1719     }
1720 
1721     /* All good */
1722     return true;
1723 }
1724 
1725 void loadvm_free_handlers(MigrationIncomingState *mis)
1726 {
1727     LoadStateEntry *le, *new_le;
1728 
1729     QLIST_FOREACH_SAFE(le, &mis->loadvm_handlers, entry, new_le) {
1730         QLIST_REMOVE(le, entry);
1731         g_free(le);
1732     }
1733 }
1734 
1735 static int
1736 qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
1737 {
1738     uint32_t instance_id, version_id, section_id;
1739     SaveStateEntry *se;
1740     LoadStateEntry *le;
1741     char idstr[256];
1742     int ret;
1743 
1744     /* Read section start */
1745     section_id = qemu_get_be32(f);
1746     if (!qemu_get_counted_string(f, idstr)) {
1747         error_report("Unable to read ID string for section %u",
1748                      section_id);
1749         return -EINVAL;
1750     }
1751     instance_id = qemu_get_be32(f);
1752     version_id = qemu_get_be32(f);
1753 
1754     trace_qemu_loadvm_state_section_startfull(section_id, idstr,
1755             instance_id, version_id);
1756     /* Find savevm section */
1757     se = find_se(idstr, instance_id);
1758     if (se == NULL) {
1759         error_report("Unknown savevm section or instance '%s' %d",
1760                      idstr, instance_id);
1761         return -EINVAL;
1762     }
1763 
1764     /* Validate version */
1765     if (version_id > se->version_id) {
1766         error_report("savevm: unsupported version %d for '%s' v%d",
1767                      version_id, idstr, se->version_id);
1768         return -EINVAL;
1769     }
1770 
1771     /* Add entry */
1772     le = g_malloc0(sizeof(*le));
1773 
1774     le->se = se;
1775     le->section_id = section_id;
1776     le->version_id = version_id;
1777     QLIST_INSERT_HEAD(&mis->loadvm_handlers, le, entry);
1778 
1779     ret = vmstate_load(f, le->se, le->version_id);
1780     if (ret < 0) {
1781         error_report("error while loading state for instance 0x%x of"
1782                      " device '%s'", instance_id, idstr);
1783         return ret;
1784     }
1785     if (!check_section_footer(f, le)) {
1786         return -EINVAL;
1787     }
1788 
1789     return 0;
1790 }
1791 
1792 static int
1793 qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
1794 {
1795     uint32_t section_id;
1796     LoadStateEntry *le;
1797     int ret;
1798 
1799     section_id = qemu_get_be32(f);
1800 
1801     trace_qemu_loadvm_state_section_partend(section_id);
1802     QLIST_FOREACH(le, &mis->loadvm_handlers, entry) {
1803         if (le->section_id == section_id) {
1804             break;
1805         }
1806     }
1807     if (le == NULL) {
1808         error_report("Unknown savevm section %d", section_id);
1809         return -EINVAL;
1810     }
1811 
1812     ret = vmstate_load(f, le->se, le->version_id);
1813     if (ret < 0) {
1814         error_report("error while loading state section id %d(%s)",
1815                      section_id, le->se->idstr);
1816         return ret;
1817     }
1818     if (!check_section_footer(f, le)) {
1819         return -EINVAL;
1820     }
1821 
1822     return 0;
1823 }
1824 
1825 static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
1826 {
1827     uint8_t section_type;
1828     int ret;
1829 
1830     while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
1831 
1832         trace_qemu_loadvm_state_section(section_type);
1833         switch (section_type) {
1834         case QEMU_VM_SECTION_START:
1835         case QEMU_VM_SECTION_FULL:
1836             ret = qemu_loadvm_section_start_full(f, mis);
1837             if (ret < 0) {
1838                 return ret;
1839             }
1840             break;
1841         case QEMU_VM_SECTION_PART:
1842         case QEMU_VM_SECTION_END:
1843             ret = qemu_loadvm_section_part_end(f, mis);
1844             if (ret < 0) {
1845                 return ret;
1846             }
1847             break;
1848         case QEMU_VM_COMMAND:
1849             ret = loadvm_process_command(f);
1850             trace_qemu_loadvm_state_section_command(ret);
1851             if ((ret < 0) || (ret & LOADVM_QUIT)) {
1852                 return ret;
1853             }
1854             break;
1855         default:
1856             error_report("Unknown savevm section type %d", section_type);
1857             return -EINVAL;
1858         }
1859     }
1860 
1861     return 0;
1862 }
1863 
1864 int qemu_loadvm_state(QEMUFile *f)
1865 {
1866     MigrationIncomingState *mis = migration_incoming_get_current();
1867     Error *local_err = NULL;
1868     unsigned int v;
1869     int ret;
1870 
1871     if (qemu_savevm_state_blocked(&local_err)) {
1872         error_report_err(local_err);
1873         return -EINVAL;
1874     }
1875 
1876     v = qemu_get_be32(f);
1877     if (v != QEMU_VM_FILE_MAGIC) {
1878         error_report("Not a migration stream");
1879         return -EINVAL;
1880     }
1881 
1882     v = qemu_get_be32(f);
1883     if (v == QEMU_VM_FILE_VERSION_COMPAT) {
1884         error_report("SaveVM v2 format is obsolete and don't work anymore");
1885         return -ENOTSUP;
1886     }
1887     if (v != QEMU_VM_FILE_VERSION) {
1888         error_report("Unsupported migration stream version");
1889         return -ENOTSUP;
1890     }
1891 
1892     if (!savevm_state.skip_configuration || enforce_config_section()) {
1893         if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
1894             error_report("Configuration section missing");
1895             return -EINVAL;
1896         }
1897         ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
1898 
1899         if (ret) {
1900             return ret;
1901         }
1902     }
1903 
1904     ret = qemu_loadvm_state_main(f, mis);
1905     qemu_event_set(&mis->main_thread_load_event);
1906 
1907     trace_qemu_loadvm_state_post_main(ret);
1908 
1909     if (mis->have_listen_thread) {
1910         /* Listen thread still going, can't clean up yet */
1911         return ret;
1912     }
1913 
1914     if (ret == 0) {
1915         ret = qemu_file_get_error(f);
1916     }
1917 
1918     /*
1919      * Try to read in the VMDESC section as well, so that dumping tools that
1920      * intercept our migration stream have the chance to see it.
1921      */
1922 
1923     /* We've got to be careful; if we don't read the data and just shut the fd
1924      * then the sender can error if we close while it's still sending.
1925      * We also mustn't read data that isn't there; some transports (RDMA)
1926      * will stall waiting for that data when the source has already closed.
1927      */
1928     if (ret == 0 && should_send_vmdesc()) {
1929         uint8_t *buf;
1930         uint32_t size;
1931         uint8_t  section_type = qemu_get_byte(f);
1932 
1933         if (section_type != QEMU_VM_VMDESCRIPTION) {
1934             error_report("Expected vmdescription section, but got %d",
1935                          section_type);
1936             /*
1937              * It doesn't seem worth failing at this point since
1938              * we apparently have an otherwise valid VM state
1939              */
1940         } else {
1941             buf = g_malloc(0x1000);
1942             size = qemu_get_be32(f);
1943 
1944             while (size > 0) {
1945                 uint32_t read_chunk = MIN(size, 0x1000);
1946                 qemu_get_buffer(f, buf, read_chunk);
1947                 size -= read_chunk;
1948             }
1949             g_free(buf);
1950         }
1951     }
1952 
1953     cpu_synchronize_all_post_init();
1954 
1955     return ret;
1956 }
1957 
1958 void hmp_savevm(Monitor *mon, const QDict *qdict)
1959 {
1960     BlockDriverState *bs, *bs1;
1961     QEMUSnapshotInfo sn1, *sn = &sn1, old_sn1, *old_sn = &old_sn1;
1962     int ret;
1963     QEMUFile *f;
1964     int saved_vm_running;
1965     uint64_t vm_state_size;
1966     qemu_timeval tv;
1967     struct tm tm;
1968     const char *name = qdict_get_try_str(qdict, "name");
1969     Error *local_err = NULL;
1970     AioContext *aio_context;
1971 
1972     if (!bdrv_all_can_snapshot(&bs)) {
1973         monitor_printf(mon, "Device '%s' is writable but does not "
1974                        "support snapshots.\n", bdrv_get_device_name(bs));
1975         return;
1976     }
1977 
1978     /* Delete old snapshots of the same name */
1979     if (name && bdrv_all_delete_snapshot(name, &bs1, &local_err) < 0) {
1980         error_reportf_err(local_err,
1981                           "Error while deleting snapshot on device '%s': ",
1982                           bdrv_get_device_name(bs1));
1983         return;
1984     }
1985 
1986     bs = bdrv_all_find_vmstate_bs();
1987     if (bs == NULL) {
1988         monitor_printf(mon, "No block device can accept snapshots\n");
1989         return;
1990     }
1991     aio_context = bdrv_get_aio_context(bs);
1992 
1993     saved_vm_running = runstate_is_running();
1994 
1995     ret = global_state_store();
1996     if (ret) {
1997         monitor_printf(mon, "Error saving global state\n");
1998         return;
1999     }
2000     vm_stop(RUN_STATE_SAVE_VM);
2001 
2002     aio_context_acquire(aio_context);
2003 
2004     memset(sn, 0, sizeof(*sn));
2005 
2006     /* fill auxiliary fields */
2007     qemu_gettimeofday(&tv);
2008     sn->date_sec = tv.tv_sec;
2009     sn->date_nsec = tv.tv_usec * 1000;
2010     sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
2011 
2012     if (name) {
2013         ret = bdrv_snapshot_find(bs, old_sn, name);
2014         if (ret >= 0) {
2015             pstrcpy(sn->name, sizeof(sn->name), old_sn->name);
2016             pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str);
2017         } else {
2018             pstrcpy(sn->name, sizeof(sn->name), name);
2019         }
2020     } else {
2021         /* cast below needed for OpenBSD where tv_sec is still 'long' */
2022         localtime_r((const time_t *)&tv.tv_sec, &tm);
2023         strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm);
2024     }
2025 
2026     /* save the VM state */
2027     f = qemu_fopen_bdrv(bs, 1);
2028     if (!f) {
2029         monitor_printf(mon, "Could not open VM state file\n");
2030         goto the_end;
2031     }
2032     ret = qemu_savevm_state(f, &local_err);
2033     vm_state_size = qemu_ftell(f);
2034     qemu_fclose(f);
2035     if (ret < 0) {
2036         error_report_err(local_err);
2037         goto the_end;
2038     }
2039 
2040     ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs);
2041     if (ret < 0) {
2042         monitor_printf(mon, "Error while creating snapshot on '%s'\n",
2043                        bdrv_get_device_name(bs));
2044     }
2045 
2046  the_end:
2047     aio_context_release(aio_context);
2048     if (saved_vm_running) {
2049         vm_start();
2050     }
2051 }
2052 
2053 void qmp_xen_save_devices_state(const char *filename, Error **errp)
2054 {
2055     QEMUFile *f;
2056     int saved_vm_running;
2057     int ret;
2058 
2059     saved_vm_running = runstate_is_running();
2060     vm_stop(RUN_STATE_SAVE_VM);
2061     global_state_store_running();
2062 
2063     f = qemu_fopen(filename, "wb");
2064     if (!f) {
2065         error_setg_file_open(errp, errno, filename);
2066         goto the_end;
2067     }
2068     ret = qemu_save_device_state(f);
2069     qemu_fclose(f);
2070     if (ret < 0) {
2071         error_setg(errp, QERR_IO_ERROR);
2072     }
2073 
2074  the_end:
2075     if (saved_vm_running) {
2076         vm_start();
2077     }
2078 }
2079 
2080 int load_vmstate(const char *name)
2081 {
2082     BlockDriverState *bs, *bs_vm_state;
2083     QEMUSnapshotInfo sn;
2084     QEMUFile *f;
2085     int ret;
2086     AioContext *aio_context;
2087 
2088     if (!bdrv_all_can_snapshot(&bs)) {
2089         error_report("Device '%s' is writable but does not support snapshots.",
2090                      bdrv_get_device_name(bs));
2091         return -ENOTSUP;
2092     }
2093     ret = bdrv_all_find_snapshot(name, &bs);
2094     if (ret < 0) {
2095         error_report("Device '%s' does not have the requested snapshot '%s'",
2096                      bdrv_get_device_name(bs), name);
2097         return ret;
2098     }
2099 
2100     bs_vm_state = bdrv_all_find_vmstate_bs();
2101     if (!bs_vm_state) {
2102         error_report("No block device supports snapshots");
2103         return -ENOTSUP;
2104     }
2105     aio_context = bdrv_get_aio_context(bs_vm_state);
2106 
2107     /* Don't even try to load empty VM states */
2108     aio_context_acquire(aio_context);
2109     ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
2110     aio_context_release(aio_context);
2111     if (ret < 0) {
2112         return ret;
2113     } else if (sn.vm_state_size == 0) {
2114         error_report("This is a disk-only snapshot. Revert to it offline "
2115             "using qemu-img.");
2116         return -EINVAL;
2117     }
2118 
2119     /* Flush all IO requests so they don't interfere with the new state.  */
2120     bdrv_drain_all();
2121 
2122     ret = bdrv_all_goto_snapshot(name, &bs);
2123     if (ret < 0) {
2124         error_report("Error %d while activating snapshot '%s' on '%s'",
2125                      ret, name, bdrv_get_device_name(bs));
2126         return ret;
2127     }
2128 
2129     /* restore the VM state */
2130     f = qemu_fopen_bdrv(bs_vm_state, 0);
2131     if (!f) {
2132         error_report("Could not open VM state file");
2133         return -EINVAL;
2134     }
2135 
2136     qemu_system_reset(VMRESET_SILENT);
2137     migration_incoming_state_new(f);
2138 
2139     aio_context_acquire(aio_context);
2140     ret = qemu_loadvm_state(f);
2141     qemu_fclose(f);
2142     aio_context_release(aio_context);
2143 
2144     migration_incoming_state_destroy();
2145     if (ret < 0) {
2146         error_report("Error %d while loading VM state", ret);
2147         return ret;
2148     }
2149 
2150     return 0;
2151 }
2152 
2153 void hmp_delvm(Monitor *mon, const QDict *qdict)
2154 {
2155     BlockDriverState *bs;
2156     Error *err;
2157     const char *name = qdict_get_str(qdict, "name");
2158 
2159     if (bdrv_all_delete_snapshot(name, &bs, &err) < 0) {
2160         error_reportf_err(err,
2161                           "Error while deleting snapshot on device '%s': ",
2162                           bdrv_get_device_name(bs));
2163     }
2164 }
2165 
2166 void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
2167 {
2168     BlockDriverState *bs, *bs1;
2169     QEMUSnapshotInfo *sn_tab, *sn;
2170     int nb_sns, i;
2171     int total;
2172     int *available_snapshots;
2173     AioContext *aio_context;
2174 
2175     bs = bdrv_all_find_vmstate_bs();
2176     if (!bs) {
2177         monitor_printf(mon, "No available block device supports snapshots\n");
2178         return;
2179     }
2180     aio_context = bdrv_get_aio_context(bs);
2181 
2182     aio_context_acquire(aio_context);
2183     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2184     aio_context_release(aio_context);
2185 
2186     if (nb_sns < 0) {
2187         monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns);
2188         return;
2189     }
2190 
2191     if (nb_sns == 0) {
2192         monitor_printf(mon, "There is no snapshot available.\n");
2193         return;
2194     }
2195 
2196     available_snapshots = g_new0(int, nb_sns);
2197     total = 0;
2198     for (i = 0; i < nb_sns; i++) {
2199         if (bdrv_all_find_snapshot(sn_tab[i].id_str, &bs1) == 0) {
2200             available_snapshots[total] = i;
2201             total++;
2202         }
2203     }
2204 
2205     if (total > 0) {
2206         bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, NULL);
2207         monitor_printf(mon, "\n");
2208         for (i = 0; i < total; i++) {
2209             sn = &sn_tab[available_snapshots[i]];
2210             bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, sn);
2211             monitor_printf(mon, "\n");
2212         }
2213     } else {
2214         monitor_printf(mon, "There is no suitable snapshot available\n");
2215     }
2216 
2217     g_free(sn_tab);
2218     g_free(available_snapshots);
2219 
2220 }
2221 
2222 void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
2223 {
2224     qemu_ram_set_idstr(memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK,
2225                        memory_region_name(mr), dev);
2226 }
2227 
2228 void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
2229 {
2230     qemu_ram_unset_idstr(memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK);
2231 }
2232 
2233 void vmstate_register_ram_global(MemoryRegion *mr)
2234 {
2235     vmstate_register_ram(mr, NULL);
2236 }
2237