xref: /openbmc/qemu/migration/savevm.c (revision 21a24302)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2009-2015 Red Hat Inc
6  *
7  * Authors:
8  *  Juan Quintela <quintela@redhat.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  */
28 
29 #include "config-host.h"
30 #include "qemu-common.h"
31 #include "hw/boards.h"
32 #include "hw/hw.h"
33 #include "hw/qdev.h"
34 #include "net/net.h"
35 #include "monitor/monitor.h"
36 #include "sysemu/sysemu.h"
37 #include "qemu/timer.h"
38 #include "audio/audio.h"
39 #include "migration/migration.h"
40 #include "migration/postcopy-ram.h"
41 #include "qapi/qmp/qerror.h"
42 #include "qemu/error-report.h"
43 #include "qemu/sockets.h"
44 #include "qemu/queue.h"
45 #include "sysemu/cpus.h"
46 #include "exec/memory.h"
47 #include "qmp-commands.h"
48 #include "trace.h"
49 #include "qemu/bitops.h"
50 #include "qemu/iov.h"
51 #include "block/snapshot.h"
52 #include "block/qapi.h"
53 
54 
55 #ifndef ETH_P_RARP
56 #define ETH_P_RARP 0x8035
57 #endif
58 #define ARP_HTYPE_ETH 0x0001
59 #define ARP_PTYPE_IP 0x0800
60 #define ARP_OP_REQUEST_REV 0x3
61 
62 const unsigned int postcopy_ram_discard_version = 0;
63 
64 static bool skip_section_footers;
65 
66 static struct mig_cmd_args {
67     ssize_t     len; /* -1 = variable */
68     const char *name;
69 } mig_cmd_args[] = {
70     [MIG_CMD_INVALID]          = { .len = -1, .name = "INVALID" },
71     [MIG_CMD_OPEN_RETURN_PATH] = { .len =  0, .name = "OPEN_RETURN_PATH" },
72     [MIG_CMD_PING]             = { .len = sizeof(uint32_t), .name = "PING" },
73     [MIG_CMD_POSTCOPY_ADVISE]  = { .len = 16, .name = "POSTCOPY_ADVISE" },
74     [MIG_CMD_POSTCOPY_LISTEN]  = { .len =  0, .name = "POSTCOPY_LISTEN" },
75     [MIG_CMD_POSTCOPY_RUN]     = { .len =  0, .name = "POSTCOPY_RUN" },
76     [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
77                                    .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
78     [MIG_CMD_PACKAGED]         = { .len =  4, .name = "PACKAGED" },
79     [MIG_CMD_MAX]              = { .len = -1, .name = "MAX" },
80 };
81 
82 static int announce_self_create(uint8_t *buf,
83                                 uint8_t *mac_addr)
84 {
85     /* Ethernet header. */
86     memset(buf, 0xff, 6);         /* destination MAC addr */
87     memcpy(buf + 6, mac_addr, 6); /* source MAC addr */
88     *(uint16_t *)(buf + 12) = htons(ETH_P_RARP); /* ethertype */
89 
90     /* RARP header. */
91     *(uint16_t *)(buf + 14) = htons(ARP_HTYPE_ETH); /* hardware addr space */
92     *(uint16_t *)(buf + 16) = htons(ARP_PTYPE_IP); /* protocol addr space */
93     *(buf + 18) = 6; /* hardware addr length (ethernet) */
94     *(buf + 19) = 4; /* protocol addr length (IPv4) */
95     *(uint16_t *)(buf + 20) = htons(ARP_OP_REQUEST_REV); /* opcode */
96     memcpy(buf + 22, mac_addr, 6); /* source hw addr */
97     memset(buf + 28, 0x00, 4);     /* source protocol addr */
98     memcpy(buf + 32, mac_addr, 6); /* target hw addr */
99     memset(buf + 38, 0x00, 4);     /* target protocol addr */
100 
101     /* Padding to get up to 60 bytes (ethernet min packet size, minus FCS). */
102     memset(buf + 42, 0x00, 18);
103 
104     return 60; /* len (FCS will be added by hardware) */
105 }
106 
107 static void qemu_announce_self_iter(NICState *nic, void *opaque)
108 {
109     uint8_t buf[60];
110     int len;
111 
112     trace_qemu_announce_self_iter(qemu_ether_ntoa(&nic->conf->macaddr));
113     len = announce_self_create(buf, nic->conf->macaddr.a);
114 
115     qemu_send_packet_raw(qemu_get_queue(nic), buf, len);
116 }
117 
118 
119 static void qemu_announce_self_once(void *opaque)
120 {
121     static int count = SELF_ANNOUNCE_ROUNDS;
122     QEMUTimer *timer = *(QEMUTimer **)opaque;
123 
124     qemu_foreach_nic(qemu_announce_self_iter, NULL);
125 
126     if (--count) {
127         /* delay 50ms, 150ms, 250ms, ... */
128         timer_mod(timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) +
129                   self_announce_delay(count));
130     } else {
131             timer_del(timer);
132             timer_free(timer);
133     }
134 }
135 
136 void qemu_announce_self(void)
137 {
138     static QEMUTimer *timer;
139     timer = timer_new_ms(QEMU_CLOCK_REALTIME, qemu_announce_self_once, &timer);
140     qemu_announce_self_once(&timer);
141 }
142 
143 /***********************************************************/
144 /* savevm/loadvm support */
145 
146 static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
147                                    int64_t pos)
148 {
149     int ret;
150     QEMUIOVector qiov;
151 
152     qemu_iovec_init_external(&qiov, iov, iovcnt);
153     ret = bdrv_writev_vmstate(opaque, &qiov, pos);
154     if (ret < 0) {
155         return ret;
156     }
157 
158     return qiov.size;
159 }
160 
161 static ssize_t block_put_buffer(void *opaque, const uint8_t *buf,
162                                 int64_t pos, size_t size)
163 {
164     bdrv_save_vmstate(opaque, buf, pos, size);
165     return size;
166 }
167 
168 static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
169                                 size_t size)
170 {
171     return bdrv_load_vmstate(opaque, buf, pos, size);
172 }
173 
174 static int bdrv_fclose(void *opaque)
175 {
176     return bdrv_flush(opaque);
177 }
178 
179 static const QEMUFileOps bdrv_read_ops = {
180     .get_buffer = block_get_buffer,
181     .close =      bdrv_fclose
182 };
183 
184 static const QEMUFileOps bdrv_write_ops = {
185     .put_buffer     = block_put_buffer,
186     .writev_buffer  = block_writev_buffer,
187     .close          = bdrv_fclose
188 };
189 
190 static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
191 {
192     if (is_writable) {
193         return qemu_fopen_ops(bs, &bdrv_write_ops);
194     }
195     return qemu_fopen_ops(bs, &bdrv_read_ops);
196 }
197 
198 
199 /* QEMUFile timer support.
200  * Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c
201  */
202 
203 void timer_put(QEMUFile *f, QEMUTimer *ts)
204 {
205     uint64_t expire_time;
206 
207     expire_time = timer_expire_time_ns(ts);
208     qemu_put_be64(f, expire_time);
209 }
210 
211 void timer_get(QEMUFile *f, QEMUTimer *ts)
212 {
213     uint64_t expire_time;
214 
215     expire_time = qemu_get_be64(f);
216     if (expire_time != -1) {
217         timer_mod_ns(ts, expire_time);
218     } else {
219         timer_del(ts);
220     }
221 }
222 
223 
224 /* VMState timer support.
225  * Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c
226  */
227 
228 static int get_timer(QEMUFile *f, void *pv, size_t size)
229 {
230     QEMUTimer *v = pv;
231     timer_get(f, v);
232     return 0;
233 }
234 
235 static void put_timer(QEMUFile *f, void *pv, size_t size)
236 {
237     QEMUTimer *v = pv;
238     timer_put(f, v);
239 }
240 
241 const VMStateInfo vmstate_info_timer = {
242     .name = "timer",
243     .get  = get_timer,
244     .put  = put_timer,
245 };
246 
247 
248 typedef struct CompatEntry {
249     char idstr[256];
250     int instance_id;
251 } CompatEntry;
252 
253 typedef struct SaveStateEntry {
254     QTAILQ_ENTRY(SaveStateEntry) entry;
255     char idstr[256];
256     int instance_id;
257     int alias_id;
258     int version_id;
259     int section_id;
260     SaveVMHandlers *ops;
261     const VMStateDescription *vmsd;
262     void *opaque;
263     CompatEntry *compat;
264     int is_ram;
265 } SaveStateEntry;
266 
267 typedef struct SaveState {
268     QTAILQ_HEAD(, SaveStateEntry) handlers;
269     int global_section_id;
270     bool skip_configuration;
271     uint32_t len;
272     const char *name;
273 } SaveState;
274 
275 static SaveState savevm_state = {
276     .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
277     .global_section_id = 0,
278     .skip_configuration = false,
279 };
280 
281 void savevm_skip_configuration(void)
282 {
283     savevm_state.skip_configuration = true;
284 }
285 
286 
287 static void configuration_pre_save(void *opaque)
288 {
289     SaveState *state = opaque;
290     const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
291 
292     state->len = strlen(current_name);
293     state->name = current_name;
294 }
295 
296 static int configuration_post_load(void *opaque, int version_id)
297 {
298     SaveState *state = opaque;
299     const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
300 
301     if (strncmp(state->name, current_name, state->len) != 0) {
302         error_report("Machine type received is '%s' and local is '%s'",
303                      state->name, current_name);
304         return -EINVAL;
305     }
306     return 0;
307 }
308 
309 static const VMStateDescription vmstate_configuration = {
310     .name = "configuration",
311     .version_id = 1,
312     .post_load = configuration_post_load,
313     .pre_save = configuration_pre_save,
314     .fields = (VMStateField[]) {
315         VMSTATE_UINT32(len, SaveState),
316         VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, 0, len),
317         VMSTATE_END_OF_LIST()
318     },
319 };
320 
321 static void dump_vmstate_vmsd(FILE *out_file,
322                               const VMStateDescription *vmsd, int indent,
323                               bool is_subsection);
324 
325 static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
326                               int indent)
327 {
328     fprintf(out_file, "%*s{\n", indent, "");
329     indent += 2;
330     fprintf(out_file, "%*s\"field\": \"%s\",\n", indent, "", field->name);
331     fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
332             field->version_id);
333     fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "",
334             field->field_exists ? "true" : "false");
335     fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size);
336     if (field->vmsd != NULL) {
337         fprintf(out_file, ",\n");
338         dump_vmstate_vmsd(out_file, field->vmsd, indent, false);
339     }
340     fprintf(out_file, "\n%*s}", indent - 2, "");
341 }
342 
343 static void dump_vmstate_vmss(FILE *out_file,
344                               const VMStateDescription **subsection,
345                               int indent)
346 {
347     if (*subsection != NULL) {
348         dump_vmstate_vmsd(out_file, *subsection, indent, true);
349     }
350 }
351 
352 static void dump_vmstate_vmsd(FILE *out_file,
353                               const VMStateDescription *vmsd, int indent,
354                               bool is_subsection)
355 {
356     if (is_subsection) {
357         fprintf(out_file, "%*s{\n", indent, "");
358     } else {
359         fprintf(out_file, "%*s\"%s\": {\n", indent, "", "Description");
360     }
361     indent += 2;
362     fprintf(out_file, "%*s\"name\": \"%s\",\n", indent, "", vmsd->name);
363     fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
364             vmsd->version_id);
365     fprintf(out_file, "%*s\"minimum_version_id\": %d", indent, "",
366             vmsd->minimum_version_id);
367     if (vmsd->fields != NULL) {
368         const VMStateField *field = vmsd->fields;
369         bool first;
370 
371         fprintf(out_file, ",\n%*s\"Fields\": [\n", indent, "");
372         first = true;
373         while (field->name != NULL) {
374             if (field->flags & VMS_MUST_EXIST) {
375                 /* Ignore VMSTATE_VALIDATE bits; these don't get migrated */
376                 field++;
377                 continue;
378             }
379             if (!first) {
380                 fprintf(out_file, ",\n");
381             }
382             dump_vmstate_vmsf(out_file, field, indent + 2);
383             field++;
384             first = false;
385         }
386         fprintf(out_file, "\n%*s]", indent, "");
387     }
388     if (vmsd->subsections != NULL) {
389         const VMStateDescription **subsection = vmsd->subsections;
390         bool first;
391 
392         fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
393         first = true;
394         while (*subsection != NULL) {
395             if (!first) {
396                 fprintf(out_file, ",\n");
397             }
398             dump_vmstate_vmss(out_file, subsection, indent + 2);
399             subsection++;
400             first = false;
401         }
402         fprintf(out_file, "\n%*s]", indent, "");
403     }
404     fprintf(out_file, "\n%*s}", indent - 2, "");
405 }
406 
407 static void dump_machine_type(FILE *out_file)
408 {
409     MachineClass *mc;
410 
411     mc = MACHINE_GET_CLASS(current_machine);
412 
413     fprintf(out_file, "  \"vmschkmachine\": {\n");
414     fprintf(out_file, "    \"Name\": \"%s\"\n", mc->name);
415     fprintf(out_file, "  },\n");
416 }
417 
418 void dump_vmstate_json_to_file(FILE *out_file)
419 {
420     GSList *list, *elt;
421     bool first;
422 
423     fprintf(out_file, "{\n");
424     dump_machine_type(out_file);
425 
426     first = true;
427     list = object_class_get_list(TYPE_DEVICE, true);
428     for (elt = list; elt; elt = elt->next) {
429         DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
430                                              TYPE_DEVICE);
431         const char *name;
432         int indent = 2;
433 
434         if (!dc->vmsd) {
435             continue;
436         }
437 
438         if (!first) {
439             fprintf(out_file, ",\n");
440         }
441         name = object_class_get_name(OBJECT_CLASS(dc));
442         fprintf(out_file, "%*s\"%s\": {\n", indent, "", name);
443         indent += 2;
444         fprintf(out_file, "%*s\"Name\": \"%s\",\n", indent, "", name);
445         fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
446                 dc->vmsd->version_id);
447         fprintf(out_file, "%*s\"minimum_version_id\": %d,\n", indent, "",
448                 dc->vmsd->minimum_version_id);
449 
450         dump_vmstate_vmsd(out_file, dc->vmsd, indent, false);
451 
452         fprintf(out_file, "\n%*s}", indent - 2, "");
453         first = false;
454     }
455     fprintf(out_file, "\n}\n");
456     fclose(out_file);
457 }
458 
459 static int calculate_new_instance_id(const char *idstr)
460 {
461     SaveStateEntry *se;
462     int instance_id = 0;
463 
464     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
465         if (strcmp(idstr, se->idstr) == 0
466             && instance_id <= se->instance_id) {
467             instance_id = se->instance_id + 1;
468         }
469     }
470     return instance_id;
471 }
472 
473 static int calculate_compat_instance_id(const char *idstr)
474 {
475     SaveStateEntry *se;
476     int instance_id = 0;
477 
478     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
479         if (!se->compat) {
480             continue;
481         }
482 
483         if (strcmp(idstr, se->compat->idstr) == 0
484             && instance_id <= se->compat->instance_id) {
485             instance_id = se->compat->instance_id + 1;
486         }
487     }
488     return instance_id;
489 }
490 
491 /* TODO: Individual devices generally have very little idea about the rest
492    of the system, so instance_id should be removed/replaced.
493    Meanwhile pass -1 as instance_id if you do not already have a clearly
494    distinguishing id for all instances of your device class. */
495 int register_savevm_live(DeviceState *dev,
496                          const char *idstr,
497                          int instance_id,
498                          int version_id,
499                          SaveVMHandlers *ops,
500                          void *opaque)
501 {
502     SaveStateEntry *se;
503 
504     se = g_new0(SaveStateEntry, 1);
505     se->version_id = version_id;
506     se->section_id = savevm_state.global_section_id++;
507     se->ops = ops;
508     se->opaque = opaque;
509     se->vmsd = NULL;
510     /* if this is a live_savem then set is_ram */
511     if (ops->save_live_setup != NULL) {
512         se->is_ram = 1;
513     }
514 
515     if (dev) {
516         char *id = qdev_get_dev_path(dev);
517         if (id) {
518             pstrcpy(se->idstr, sizeof(se->idstr), id);
519             pstrcat(se->idstr, sizeof(se->idstr), "/");
520             g_free(id);
521 
522             se->compat = g_new0(CompatEntry, 1);
523             pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), idstr);
524             se->compat->instance_id = instance_id == -1 ?
525                          calculate_compat_instance_id(idstr) : instance_id;
526             instance_id = -1;
527         }
528     }
529     pstrcat(se->idstr, sizeof(se->idstr), idstr);
530 
531     if (instance_id == -1) {
532         se->instance_id = calculate_new_instance_id(se->idstr);
533     } else {
534         se->instance_id = instance_id;
535     }
536     assert(!se->compat || se->instance_id == 0);
537     /* add at the end of list */
538     QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
539     return 0;
540 }
541 
542 int register_savevm(DeviceState *dev,
543                     const char *idstr,
544                     int instance_id,
545                     int version_id,
546                     SaveStateHandler *save_state,
547                     LoadStateHandler *load_state,
548                     void *opaque)
549 {
550     SaveVMHandlers *ops = g_new0(SaveVMHandlers, 1);
551     ops->save_state = save_state;
552     ops->load_state = load_state;
553     return register_savevm_live(dev, idstr, instance_id, version_id,
554                                 ops, opaque);
555 }
556 
557 void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
558 {
559     SaveStateEntry *se, *new_se;
560     char id[256] = "";
561 
562     if (dev) {
563         char *path = qdev_get_dev_path(dev);
564         if (path) {
565             pstrcpy(id, sizeof(id), path);
566             pstrcat(id, sizeof(id), "/");
567             g_free(path);
568         }
569     }
570     pstrcat(id, sizeof(id), idstr);
571 
572     QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
573         if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
574             QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
575             g_free(se->compat);
576             g_free(se->ops);
577             g_free(se);
578         }
579     }
580 }
581 
582 int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
583                                    const VMStateDescription *vmsd,
584                                    void *opaque, int alias_id,
585                                    int required_for_version)
586 {
587     SaveStateEntry *se;
588 
589     /* If this triggers, alias support can be dropped for the vmsd. */
590     assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
591 
592     se = g_new0(SaveStateEntry, 1);
593     se->version_id = vmsd->version_id;
594     se->section_id = savevm_state.global_section_id++;
595     se->opaque = opaque;
596     se->vmsd = vmsd;
597     se->alias_id = alias_id;
598 
599     if (dev) {
600         char *id = qdev_get_dev_path(dev);
601         if (id) {
602             pstrcpy(se->idstr, sizeof(se->idstr), id);
603             pstrcat(se->idstr, sizeof(se->idstr), "/");
604             g_free(id);
605 
606             se->compat = g_new0(CompatEntry, 1);
607             pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
608             se->compat->instance_id = instance_id == -1 ?
609                          calculate_compat_instance_id(vmsd->name) : instance_id;
610             instance_id = -1;
611         }
612     }
613     pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
614 
615     if (instance_id == -1) {
616         se->instance_id = calculate_new_instance_id(se->idstr);
617     } else {
618         se->instance_id = instance_id;
619     }
620     assert(!se->compat || se->instance_id == 0);
621     /* add at the end of list */
622     QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
623     return 0;
624 }
625 
626 void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
627                         void *opaque)
628 {
629     SaveStateEntry *se, *new_se;
630 
631     QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
632         if (se->vmsd == vmsd && se->opaque == opaque) {
633             QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
634             g_free(se->compat);
635             g_free(se);
636         }
637     }
638 }
639 
640 static int vmstate_load(QEMUFile *f, SaveStateEntry *se, int version_id)
641 {
642     trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
643     if (!se->vmsd) {         /* Old style */
644         return se->ops->load_state(f, se->opaque, version_id);
645     }
646     return vmstate_load_state(f, se->vmsd, se->opaque, version_id);
647 }
648 
649 static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
650 {
651     int64_t old_offset, size;
652 
653     old_offset = qemu_ftell_fast(f);
654     se->ops->save_state(f, se->opaque);
655     size = qemu_ftell_fast(f) - old_offset;
656 
657     if (vmdesc) {
658         json_prop_int(vmdesc, "size", size);
659         json_start_array(vmdesc, "fields");
660         json_start_object(vmdesc, NULL);
661         json_prop_str(vmdesc, "name", "data");
662         json_prop_int(vmdesc, "size", size);
663         json_prop_str(vmdesc, "type", "buffer");
664         json_end_object(vmdesc);
665         json_end_array(vmdesc);
666     }
667 }
668 
669 static void vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
670 {
671     trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
672     if (!se->vmsd) {
673         vmstate_save_old_style(f, se, vmdesc);
674         return;
675     }
676     vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
677 }
678 
679 void savevm_skip_section_footers(void)
680 {
681     skip_section_footers = true;
682 }
683 
684 /*
685  * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
686  */
687 static void save_section_header(QEMUFile *f, SaveStateEntry *se,
688                                 uint8_t section_type)
689 {
690     qemu_put_byte(f, section_type);
691     qemu_put_be32(f, se->section_id);
692 
693     if (section_type == QEMU_VM_SECTION_FULL ||
694         section_type == QEMU_VM_SECTION_START) {
695         /* ID string */
696         size_t len = strlen(se->idstr);
697         qemu_put_byte(f, len);
698         qemu_put_buffer(f, (uint8_t *)se->idstr, len);
699 
700         qemu_put_be32(f, se->instance_id);
701         qemu_put_be32(f, se->version_id);
702     }
703 }
704 
705 /*
706  * Write a footer onto device sections that catches cases misformatted device
707  * sections.
708  */
709 static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
710 {
711     if (!skip_section_footers) {
712         qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
713         qemu_put_be32(f, se->section_id);
714     }
715 }
716 
717 /**
718  * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
719  *                           command and associated data.
720  *
721  * @f: File to send command on
722  * @command: Command type to send
723  * @len: Length of associated data
724  * @data: Data associated with command.
725  */
726 void qemu_savevm_command_send(QEMUFile *f,
727                               enum qemu_vm_cmd command,
728                               uint16_t len,
729                               uint8_t *data)
730 {
731     trace_savevm_command_send(command, len);
732     qemu_put_byte(f, QEMU_VM_COMMAND);
733     qemu_put_be16(f, (uint16_t)command);
734     qemu_put_be16(f, len);
735     qemu_put_buffer(f, data, len);
736     qemu_fflush(f);
737 }
738 
739 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
740 {
741     uint32_t buf;
742 
743     trace_savevm_send_ping(value);
744     buf = cpu_to_be32(value);
745     qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf);
746 }
747 
748 void qemu_savevm_send_open_return_path(QEMUFile *f)
749 {
750     trace_savevm_send_open_return_path();
751     qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
752 }
753 
754 /* We have a buffer of data to send; we don't want that all to be loaded
755  * by the command itself, so the command contains just the length of the
756  * extra buffer that we then send straight after it.
757  * TODO: Must be a better way to organise that
758  *
759  * Returns:
760  *    0 on success
761  *    -ve on error
762  */
763 int qemu_savevm_send_packaged(QEMUFile *f, const QEMUSizedBuffer *qsb)
764 {
765     size_t cur_iov;
766     size_t len = qsb_get_length(qsb);
767     uint32_t tmp;
768 
769     if (len > MAX_VM_CMD_PACKAGED_SIZE) {
770         error_report("%s: Unreasonably large packaged state: %zu",
771                      __func__, len);
772         return -1;
773     }
774 
775     tmp = cpu_to_be32(len);
776 
777     trace_qemu_savevm_send_packaged();
778     qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
779 
780     /* all the data follows (concatinating the iov's) */
781     for (cur_iov = 0; cur_iov < qsb->n_iov; cur_iov++) {
782         /* The iov entries are partially filled */
783         size_t towrite = MIN(qsb->iov[cur_iov].iov_len, len);
784         len -= towrite;
785 
786         if (!towrite) {
787             break;
788         }
789 
790         qemu_put_buffer(f, qsb->iov[cur_iov].iov_base, towrite);
791     }
792 
793     return 0;
794 }
795 
796 /* Send prior to any postcopy transfer */
797 void qemu_savevm_send_postcopy_advise(QEMUFile *f)
798 {
799     uint64_t tmp[2];
800     tmp[0] = cpu_to_be64(getpagesize());
801     tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
802 
803     trace_qemu_savevm_send_postcopy_advise();
804     qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp);
805 }
806 
807 /* Sent prior to starting the destination running in postcopy, discard pages
808  * that have already been sent but redirtied on the source.
809  * CMD_POSTCOPY_RAM_DISCARD consist of:
810  *      byte   version (0)
811  *      byte   Length of name field (not including 0)
812  *  n x byte   RAM block name
813  *      byte   0 terminator (just for safety)
814  *  n x        Byte ranges within the named RAMBlock
815  *      be64   Start of the range
816  *      be64   Length
817  *
818  *  name:  RAMBlock name that these entries are part of
819  *  len: Number of page entries
820  *  start_list: 'len' addresses
821  *  length_list: 'len' addresses
822  *
823  */
824 void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
825                                            uint16_t len,
826                                            uint64_t *start_list,
827                                            uint64_t *length_list)
828 {
829     uint8_t *buf;
830     uint16_t tmplen;
831     uint16_t t;
832     size_t name_len = strlen(name);
833 
834     trace_qemu_savevm_send_postcopy_ram_discard(name, len);
835     assert(name_len < 256);
836     buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len);
837     buf[0] = postcopy_ram_discard_version;
838     buf[1] = name_len;
839     memcpy(buf + 2, name, name_len);
840     tmplen = 2 + name_len;
841     buf[tmplen++] = '\0';
842 
843     for (t = 0; t < len; t++) {
844         cpu_to_be64w((uint64_t *)(buf + tmplen), start_list[t]);
845         tmplen += 8;
846         cpu_to_be64w((uint64_t *)(buf + tmplen), length_list[t]);
847         tmplen += 8;
848     }
849     qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
850     g_free(buf);
851 }
852 
853 /* Get the destination into a state where it can receive postcopy data. */
854 void qemu_savevm_send_postcopy_listen(QEMUFile *f)
855 {
856     trace_savevm_send_postcopy_listen();
857     qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
858 }
859 
860 /* Kick the destination into running */
861 void qemu_savevm_send_postcopy_run(QEMUFile *f)
862 {
863     trace_savevm_send_postcopy_run();
864     qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
865 }
866 
867 bool qemu_savevm_state_blocked(Error **errp)
868 {
869     SaveStateEntry *se;
870 
871     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
872         if (se->vmsd && se->vmsd->unmigratable) {
873             error_setg(errp, "State blocked by non-migratable device '%s'",
874                        se->idstr);
875             return true;
876         }
877     }
878     return false;
879 }
880 
881 void qemu_savevm_state_header(QEMUFile *f)
882 {
883     trace_savevm_state_header();
884     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
885     qemu_put_be32(f, QEMU_VM_FILE_VERSION);
886 
887     if (!savevm_state.skip_configuration) {
888         qemu_put_byte(f, QEMU_VM_CONFIGURATION);
889         vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
890     }
891 
892 }
893 
894 void qemu_savevm_state_begin(QEMUFile *f,
895                              const MigrationParams *params)
896 {
897     SaveStateEntry *se;
898     int ret;
899 
900     trace_savevm_state_begin();
901     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
902         if (!se->ops || !se->ops->set_params) {
903             continue;
904         }
905         se->ops->set_params(params, se->opaque);
906     }
907 
908     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
909         if (!se->ops || !se->ops->save_live_setup) {
910             continue;
911         }
912         if (se->ops && se->ops->is_active) {
913             if (!se->ops->is_active(se->opaque)) {
914                 continue;
915             }
916         }
917         save_section_header(f, se, QEMU_VM_SECTION_START);
918 
919         ret = se->ops->save_live_setup(f, se->opaque);
920         save_section_footer(f, se);
921         if (ret < 0) {
922             qemu_file_set_error(f, ret);
923             break;
924         }
925     }
926 }
927 
928 /*
929  * this function has three return values:
930  *   negative: there was one error, and we have -errno.
931  *   0 : We haven't finished, caller have to go again
932  *   1 : We have finished, we can go to complete phase
933  */
934 int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
935 {
936     SaveStateEntry *se;
937     int ret = 1;
938 
939     trace_savevm_state_iterate();
940     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
941         if (!se->ops || !se->ops->save_live_iterate) {
942             continue;
943         }
944         if (se->ops && se->ops->is_active) {
945             if (!se->ops->is_active(se->opaque)) {
946                 continue;
947             }
948         }
949         /*
950          * In the postcopy phase, any device that doesn't know how to
951          * do postcopy should have saved it's state in the _complete
952          * call that's already run, it might get confused if we call
953          * iterate afterwards.
954          */
955         if (postcopy && !se->ops->save_live_complete_postcopy) {
956             continue;
957         }
958         if (qemu_file_rate_limit(f)) {
959             return 0;
960         }
961         trace_savevm_section_start(se->idstr, se->section_id);
962 
963         save_section_header(f, se, QEMU_VM_SECTION_PART);
964 
965         ret = se->ops->save_live_iterate(f, se->opaque);
966         trace_savevm_section_end(se->idstr, se->section_id, ret);
967         save_section_footer(f, se);
968 
969         if (ret < 0) {
970             qemu_file_set_error(f, ret);
971         }
972         if (ret <= 0) {
973             /* Do not proceed to the next vmstate before this one reported
974                completion of the current stage. This serializes the migration
975                and reduces the probability that a faster changing state is
976                synchronized over and over again. */
977             break;
978         }
979     }
980     return ret;
981 }
982 
983 static bool should_send_vmdesc(void)
984 {
985     MachineState *machine = MACHINE(qdev_get_machine());
986     bool in_postcopy = migration_in_postcopy(migrate_get_current());
987     return !machine->suppress_vmdesc && !in_postcopy;
988 }
989 
990 /*
991  * Calls the save_live_complete_postcopy methods
992  * causing the last few pages to be sent immediately and doing any associated
993  * cleanup.
994  * Note postcopy also calls qemu_savevm_state_complete_precopy to complete
995  * all the other devices, but that happens at the point we switch to postcopy.
996  */
997 void qemu_savevm_state_complete_postcopy(QEMUFile *f)
998 {
999     SaveStateEntry *se;
1000     int ret;
1001 
1002     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1003         if (!se->ops || !se->ops->save_live_complete_postcopy) {
1004             continue;
1005         }
1006         if (se->ops && se->ops->is_active) {
1007             if (!se->ops->is_active(se->opaque)) {
1008                 continue;
1009             }
1010         }
1011         trace_savevm_section_start(se->idstr, se->section_id);
1012         /* Section type */
1013         qemu_put_byte(f, QEMU_VM_SECTION_END);
1014         qemu_put_be32(f, se->section_id);
1015 
1016         ret = se->ops->save_live_complete_postcopy(f, se->opaque);
1017         trace_savevm_section_end(se->idstr, se->section_id, ret);
1018         save_section_footer(f, se);
1019         if (ret < 0) {
1020             qemu_file_set_error(f, ret);
1021             return;
1022         }
1023     }
1024 
1025     qemu_put_byte(f, QEMU_VM_EOF);
1026     qemu_fflush(f);
1027 }
1028 
1029 void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only)
1030 {
1031     QJSON *vmdesc;
1032     int vmdesc_len;
1033     SaveStateEntry *se;
1034     int ret;
1035     bool in_postcopy = migration_in_postcopy(migrate_get_current());
1036 
1037     trace_savevm_state_complete_precopy();
1038 
1039     cpu_synchronize_all_states();
1040 
1041     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1042         if (!se->ops ||
1043             (in_postcopy && se->ops->save_live_complete_postcopy) ||
1044             (in_postcopy && !iterable_only) ||
1045             !se->ops->save_live_complete_precopy) {
1046             continue;
1047         }
1048 
1049         if (se->ops && se->ops->is_active) {
1050             if (!se->ops->is_active(se->opaque)) {
1051                 continue;
1052             }
1053         }
1054         trace_savevm_section_start(se->idstr, se->section_id);
1055 
1056         save_section_header(f, se, QEMU_VM_SECTION_END);
1057 
1058         ret = se->ops->save_live_complete_precopy(f, se->opaque);
1059         trace_savevm_section_end(se->idstr, se->section_id, ret);
1060         save_section_footer(f, se);
1061         if (ret < 0) {
1062             qemu_file_set_error(f, ret);
1063             return;
1064         }
1065     }
1066 
1067     if (iterable_only) {
1068         return;
1069     }
1070 
1071     vmdesc = qjson_new();
1072     json_prop_int(vmdesc, "page_size", TARGET_PAGE_SIZE);
1073     json_start_array(vmdesc, "devices");
1074     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1075 
1076         if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
1077             continue;
1078         }
1079         if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
1080             trace_savevm_section_skip(se->idstr, se->section_id);
1081             continue;
1082         }
1083 
1084         trace_savevm_section_start(se->idstr, se->section_id);
1085 
1086         json_start_object(vmdesc, NULL);
1087         json_prop_str(vmdesc, "name", se->idstr);
1088         json_prop_int(vmdesc, "instance_id", se->instance_id);
1089 
1090         save_section_header(f, se, QEMU_VM_SECTION_FULL);
1091 
1092         vmstate_save(f, se, vmdesc);
1093 
1094         json_end_object(vmdesc);
1095         trace_savevm_section_end(se->idstr, se->section_id, 0);
1096         save_section_footer(f, se);
1097     }
1098 
1099     if (!in_postcopy) {
1100         /* Postcopy stream will still be going */
1101         qemu_put_byte(f, QEMU_VM_EOF);
1102     }
1103 
1104     json_end_array(vmdesc);
1105     qjson_finish(vmdesc);
1106     vmdesc_len = strlen(qjson_get_str(vmdesc));
1107 
1108     if (should_send_vmdesc()) {
1109         qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
1110         qemu_put_be32(f, vmdesc_len);
1111         qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len);
1112     }
1113     object_unref(OBJECT(vmdesc));
1114 
1115     qemu_fflush(f);
1116 }
1117 
1118 /* Give an estimate of the amount left to be transferred,
1119  * the result is split into the amount for units that can and
1120  * for units that can't do postcopy.
1121  */
1122 void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
1123                                uint64_t *res_non_postcopiable,
1124                                uint64_t *res_postcopiable)
1125 {
1126     SaveStateEntry *se;
1127 
1128     *res_non_postcopiable = 0;
1129     *res_postcopiable = 0;
1130 
1131 
1132     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1133         if (!se->ops || !se->ops->save_live_pending) {
1134             continue;
1135         }
1136         if (se->ops && se->ops->is_active) {
1137             if (!se->ops->is_active(se->opaque)) {
1138                 continue;
1139             }
1140         }
1141         se->ops->save_live_pending(f, se->opaque, max_size,
1142                                    res_non_postcopiable, res_postcopiable);
1143     }
1144 }
1145 
1146 void qemu_savevm_state_cleanup(void)
1147 {
1148     SaveStateEntry *se;
1149 
1150     trace_savevm_state_cleanup();
1151     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1152         if (se->ops && se->ops->cleanup) {
1153             se->ops->cleanup(se->opaque);
1154         }
1155     }
1156 }
1157 
1158 static int qemu_savevm_state(QEMUFile *f, Error **errp)
1159 {
1160     int ret;
1161     MigrationParams params = {
1162         .blk = 0,
1163         .shared = 0
1164     };
1165     MigrationState *ms = migrate_init(&params);
1166     ms->file = f;
1167 
1168     if (qemu_savevm_state_blocked(errp)) {
1169         return -EINVAL;
1170     }
1171 
1172     qemu_mutex_unlock_iothread();
1173     qemu_savevm_state_header(f);
1174     qemu_savevm_state_begin(f, &params);
1175     qemu_mutex_lock_iothread();
1176 
1177     while (qemu_file_get_error(f) == 0) {
1178         if (qemu_savevm_state_iterate(f, false) > 0) {
1179             break;
1180         }
1181     }
1182 
1183     ret = qemu_file_get_error(f);
1184     if (ret == 0) {
1185         qemu_savevm_state_complete_precopy(f, false);
1186         ret = qemu_file_get_error(f);
1187     }
1188     qemu_savevm_state_cleanup();
1189     if (ret != 0) {
1190         error_setg_errno(errp, -ret, "Error while writing VM state");
1191     }
1192     return ret;
1193 }
1194 
1195 static int qemu_save_device_state(QEMUFile *f)
1196 {
1197     SaveStateEntry *se;
1198 
1199     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
1200     qemu_put_be32(f, QEMU_VM_FILE_VERSION);
1201 
1202     cpu_synchronize_all_states();
1203 
1204     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1205         if (se->is_ram) {
1206             continue;
1207         }
1208         if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
1209             continue;
1210         }
1211         if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
1212             continue;
1213         }
1214 
1215         save_section_header(f, se, QEMU_VM_SECTION_FULL);
1216 
1217         vmstate_save(f, se, NULL);
1218 
1219         save_section_footer(f, se);
1220     }
1221 
1222     qemu_put_byte(f, QEMU_VM_EOF);
1223 
1224     return qemu_file_get_error(f);
1225 }
1226 
1227 static SaveStateEntry *find_se(const char *idstr, int instance_id)
1228 {
1229     SaveStateEntry *se;
1230 
1231     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1232         if (!strcmp(se->idstr, idstr) &&
1233             (instance_id == se->instance_id ||
1234              instance_id == se->alias_id))
1235             return se;
1236         /* Migrating from an older version? */
1237         if (strstr(se->idstr, idstr) && se->compat) {
1238             if (!strcmp(se->compat->idstr, idstr) &&
1239                 (instance_id == se->compat->instance_id ||
1240                  instance_id == se->alias_id))
1241                 return se;
1242         }
1243     }
1244     return NULL;
1245 }
1246 
1247 enum LoadVMExitCodes {
1248     /* Allow a command to quit all layers of nested loadvm loops */
1249     LOADVM_QUIT     =  1,
1250 };
1251 
1252 static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
1253 
1254 /* ------ incoming postcopy messages ------ */
1255 /* 'advise' arrives before any transfers just to tell us that a postcopy
1256  * *might* happen - it might be skipped if precopy transferred everything
1257  * quickly.
1258  */
1259 static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
1260 {
1261     PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
1262     uint64_t remote_hps, remote_tps;
1263 
1264     trace_loadvm_postcopy_handle_advise();
1265     if (ps != POSTCOPY_INCOMING_NONE) {
1266         error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
1267         return -1;
1268     }
1269 
1270     if (!postcopy_ram_supported_by_host()) {
1271         return -1;
1272     }
1273 
1274     remote_hps = qemu_get_be64(mis->from_src_file);
1275     if (remote_hps != getpagesize())  {
1276         /*
1277          * Some combinations of mismatch are probably possible but it gets
1278          * a bit more complicated.  In particular we need to place whole
1279          * host pages on the dest at once, and we need to ensure that we
1280          * handle dirtying to make sure we never end up sending part of
1281          * a hostpage on it's own.
1282          */
1283         error_report("Postcopy needs matching host page sizes (s=%d d=%d)",
1284                      (int)remote_hps, getpagesize());
1285         return -1;
1286     }
1287 
1288     remote_tps = qemu_get_be64(mis->from_src_file);
1289     if (remote_tps != (1ul << qemu_target_page_bits())) {
1290         /*
1291          * Again, some differences could be dealt with, but for now keep it
1292          * simple.
1293          */
1294         error_report("Postcopy needs matching target page sizes (s=%d d=%d)",
1295                      (int)remote_tps, 1 << qemu_target_page_bits());
1296         return -1;
1297     }
1298 
1299     if (ram_postcopy_incoming_init(mis)) {
1300         return -1;
1301     }
1302 
1303     postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
1304 
1305     return 0;
1306 }
1307 
1308 /* After postcopy we will be told to throw some pages away since they're
1309  * dirty and will have to be demand fetched.  Must happen before CPU is
1310  * started.
1311  * There can be 0..many of these messages, each encoding multiple pages.
1312  */
1313 static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
1314                                               uint16_t len)
1315 {
1316     int tmp;
1317     char ramid[256];
1318     PostcopyState ps = postcopy_state_get();
1319 
1320     trace_loadvm_postcopy_ram_handle_discard();
1321 
1322     switch (ps) {
1323     case POSTCOPY_INCOMING_ADVISE:
1324         /* 1st discard */
1325         tmp = postcopy_ram_prepare_discard(mis);
1326         if (tmp) {
1327             return tmp;
1328         }
1329         break;
1330 
1331     case POSTCOPY_INCOMING_DISCARD:
1332         /* Expected state */
1333         break;
1334 
1335     default:
1336         error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
1337                      ps);
1338         return -1;
1339     }
1340     /* We're expecting a
1341      *    Version (0)
1342      *    a RAM ID string (length byte, name, 0 term)
1343      *    then at least 1 16 byte chunk
1344     */
1345     if (len < (1 + 1 + 1 + 1 + 2 * 8)) {
1346         error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1347         return -1;
1348     }
1349 
1350     tmp = qemu_get_byte(mis->from_src_file);
1351     if (tmp != postcopy_ram_discard_version) {
1352         error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
1353         return -1;
1354     }
1355 
1356     if (!qemu_get_counted_string(mis->from_src_file, ramid)) {
1357         error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
1358         return -1;
1359     }
1360     tmp = qemu_get_byte(mis->from_src_file);
1361     if (tmp != 0) {
1362         error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp);
1363         return -1;
1364     }
1365 
1366     len -= 3 + strlen(ramid);
1367     if (len % 16) {
1368         error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1369         return -1;
1370     }
1371     trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
1372     while (len) {
1373         uint64_t start_addr, block_length;
1374         start_addr = qemu_get_be64(mis->from_src_file);
1375         block_length = qemu_get_be64(mis->from_src_file);
1376 
1377         len -= 16;
1378         int ret = ram_discard_range(mis, ramid, start_addr,
1379                                     block_length);
1380         if (ret) {
1381             return ret;
1382         }
1383     }
1384     trace_loadvm_postcopy_ram_handle_discard_end();
1385 
1386     return 0;
1387 }
1388 
1389 /*
1390  * Triggered by a postcopy_listen command; this thread takes over reading
1391  * the input stream, leaving the main thread free to carry on loading the rest
1392  * of the device state (from RAM).
1393  * (TODO:This could do with being in a postcopy file - but there again it's
1394  * just another input loop, not that postcopy specific)
1395  */
1396 static void *postcopy_ram_listen_thread(void *opaque)
1397 {
1398     QEMUFile *f = opaque;
1399     MigrationIncomingState *mis = migration_incoming_get_current();
1400     int load_res;
1401 
1402     qemu_sem_post(&mis->listen_thread_sem);
1403     trace_postcopy_ram_listen_thread_start();
1404 
1405     /*
1406      * Because we're a thread and not a coroutine we can't yield
1407      * in qemu_file, and thus we must be blocking now.
1408      */
1409     qemu_file_set_blocking(f, true);
1410     load_res = qemu_loadvm_state_main(f, mis);
1411     /* And non-blocking again so we don't block in any cleanup */
1412     qemu_file_set_blocking(f, false);
1413 
1414     trace_postcopy_ram_listen_thread_exit();
1415     if (load_res < 0) {
1416         error_report("%s: loadvm failed: %d", __func__, load_res);
1417         qemu_file_set_error(f, load_res);
1418     } else {
1419         /*
1420          * This looks good, but it's possible that the device loading in the
1421          * main thread hasn't finished yet, and so we might not be in 'RUN'
1422          * state yet; wait for the end of the main thread.
1423          */
1424         qemu_event_wait(&mis->main_thread_load_event);
1425     }
1426     postcopy_ram_incoming_cleanup(mis);
1427     /*
1428      * If everything has worked fine, then the main thread has waited
1429      * for us to start, and we're the last use of the mis.
1430      * (If something broke then qemu will have to exit anyway since it's
1431      * got a bad migration state).
1432      */
1433     migration_incoming_state_destroy();
1434 
1435     if (load_res < 0) {
1436         /*
1437          * If something went wrong then we have a bad state so exit;
1438          * depending how far we got it might be possible at this point
1439          * to leave the guest running and fire MCEs for pages that never
1440          * arrived as a desperate recovery step.
1441          */
1442         exit(EXIT_FAILURE);
1443     }
1444 
1445     return NULL;
1446 }
1447 
1448 /* After this message we must be able to immediately receive postcopy data */
1449 static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
1450 {
1451     PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
1452     trace_loadvm_postcopy_handle_listen();
1453     if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
1454         error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
1455         return -1;
1456     }
1457     if (ps == POSTCOPY_INCOMING_ADVISE) {
1458         /*
1459          * A rare case, we entered listen without having to do any discards,
1460          * so do the setup that's normally done at the time of the 1st discard.
1461          */
1462         postcopy_ram_prepare_discard(mis);
1463     }
1464 
1465     /*
1466      * Sensitise RAM - can now generate requests for blocks that don't exist
1467      * However, at this point the CPU shouldn't be running, and the IO
1468      * shouldn't be doing anything yet so don't actually expect requests
1469      */
1470     if (postcopy_ram_enable_notify(mis)) {
1471         return -1;
1472     }
1473 
1474     if (mis->have_listen_thread) {
1475         error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread");
1476         return -1;
1477     }
1478 
1479     mis->have_listen_thread = true;
1480     /* Start up the listening thread and wait for it to signal ready */
1481     qemu_sem_init(&mis->listen_thread_sem, 0);
1482     qemu_thread_create(&mis->listen_thread, "postcopy/listen",
1483                        postcopy_ram_listen_thread, mis->from_src_file,
1484                        QEMU_THREAD_JOINABLE);
1485     qemu_sem_wait(&mis->listen_thread_sem);
1486     qemu_sem_destroy(&mis->listen_thread_sem);
1487 
1488     return 0;
1489 }
1490 
1491 /* After all discards we can start running and asking for pages */
1492 static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
1493 {
1494     PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
1495     Error *local_err = NULL;
1496 
1497     trace_loadvm_postcopy_handle_run();
1498     if (ps != POSTCOPY_INCOMING_LISTENING) {
1499         error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
1500         return -1;
1501     }
1502 
1503     /* TODO we should move all of this lot into postcopy_ram.c or a shared code
1504      * in migration.c
1505      */
1506     cpu_synchronize_all_post_init();
1507 
1508     qemu_announce_self();
1509 
1510     /* Make sure all file formats flush their mutable metadata */
1511     bdrv_invalidate_cache_all(&local_err);
1512     if (local_err) {
1513         error_report_err(local_err);
1514         return -1;
1515     }
1516 
1517     trace_loadvm_postcopy_handle_run_cpu_sync();
1518     cpu_synchronize_all_post_init();
1519 
1520     trace_loadvm_postcopy_handle_run_vmstart();
1521 
1522     if (autostart) {
1523         /* Hold onto your hats, starting the CPU */
1524         vm_start();
1525     } else {
1526         /* leave it paused and let management decide when to start the CPU */
1527         runstate_set(RUN_STATE_PAUSED);
1528     }
1529 
1530     /* We need to finish reading the stream from the package
1531      * and also stop reading anything more from the stream that loaded the
1532      * package (since it's now being read by the listener thread).
1533      * LOADVM_QUIT will quit all the layers of nested loadvm loops.
1534      */
1535     return LOADVM_QUIT;
1536 }
1537 
1538 /**
1539  * Immediately following this command is a blob of data containing an embedded
1540  * chunk of migration stream; read it and load it.
1541  *
1542  * @mis: Incoming state
1543  * @length: Length of packaged data to read
1544  *
1545  * Returns: Negative values on error
1546  *
1547  */
1548 static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
1549 {
1550     int ret;
1551     uint8_t *buffer;
1552     uint32_t length;
1553     QEMUSizedBuffer *qsb;
1554 
1555     length = qemu_get_be32(mis->from_src_file);
1556     trace_loadvm_handle_cmd_packaged(length);
1557 
1558     if (length > MAX_VM_CMD_PACKAGED_SIZE) {
1559         error_report("Unreasonably large packaged state: %u", length);
1560         return -1;
1561     }
1562     buffer = g_malloc0(length);
1563     ret = qemu_get_buffer(mis->from_src_file, buffer, (int)length);
1564     if (ret != length) {
1565         g_free(buffer);
1566         error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%d\n",
1567                 ret, length);
1568         return (ret < 0) ? ret : -EAGAIN;
1569     }
1570     trace_loadvm_handle_cmd_packaged_received(ret);
1571 
1572     /* Setup a dummy QEMUFile that actually reads from the buffer */
1573     qsb = qsb_create(buffer, length);
1574     g_free(buffer); /* Because qsb_create copies */
1575     if (!qsb) {
1576         error_report("Unable to create qsb");
1577     }
1578     QEMUFile *packf = qemu_bufopen("r", qsb);
1579 
1580     ret = qemu_loadvm_state_main(packf, mis);
1581     trace_loadvm_handle_cmd_packaged_main(ret);
1582     qemu_fclose(packf);
1583     qsb_free(qsb);
1584 
1585     return ret;
1586 }
1587 
1588 /*
1589  * Process an incoming 'QEMU_VM_COMMAND'
1590  * 0           just a normal return
1591  * LOADVM_QUIT All good, but exit the loop
1592  * <0          Error
1593  */
1594 static int loadvm_process_command(QEMUFile *f)
1595 {
1596     MigrationIncomingState *mis = migration_incoming_get_current();
1597     uint16_t cmd;
1598     uint16_t len;
1599     uint32_t tmp32;
1600 
1601     cmd = qemu_get_be16(f);
1602     len = qemu_get_be16(f);
1603 
1604     trace_loadvm_process_command(cmd, len);
1605     if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
1606         error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
1607         return -EINVAL;
1608     }
1609 
1610     if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
1611         error_report("%s received with bad length - expecting %zu, got %d",
1612                      mig_cmd_args[cmd].name,
1613                      (size_t)mig_cmd_args[cmd].len, len);
1614         return -ERANGE;
1615     }
1616 
1617     switch (cmd) {
1618     case MIG_CMD_OPEN_RETURN_PATH:
1619         if (mis->to_src_file) {
1620             error_report("CMD_OPEN_RETURN_PATH called when RP already open");
1621             /* Not really a problem, so don't give up */
1622             return 0;
1623         }
1624         mis->to_src_file = qemu_file_get_return_path(f);
1625         if (!mis->to_src_file) {
1626             error_report("CMD_OPEN_RETURN_PATH failed");
1627             return -1;
1628         }
1629         break;
1630 
1631     case MIG_CMD_PING:
1632         tmp32 = qemu_get_be32(f);
1633         trace_loadvm_process_command_ping(tmp32);
1634         if (!mis->to_src_file) {
1635             error_report("CMD_PING (0x%x) received with no return path",
1636                          tmp32);
1637             return -1;
1638         }
1639         migrate_send_rp_pong(mis, tmp32);
1640         break;
1641 
1642     case MIG_CMD_PACKAGED:
1643         return loadvm_handle_cmd_packaged(mis);
1644 
1645     case MIG_CMD_POSTCOPY_ADVISE:
1646         return loadvm_postcopy_handle_advise(mis);
1647 
1648     case MIG_CMD_POSTCOPY_LISTEN:
1649         return loadvm_postcopy_handle_listen(mis);
1650 
1651     case MIG_CMD_POSTCOPY_RUN:
1652         return loadvm_postcopy_handle_run(mis);
1653 
1654     case MIG_CMD_POSTCOPY_RAM_DISCARD:
1655         return loadvm_postcopy_ram_handle_discard(mis, len);
1656     }
1657 
1658     return 0;
1659 }
1660 
1661 struct LoadStateEntry {
1662     QLIST_ENTRY(LoadStateEntry) entry;
1663     SaveStateEntry *se;
1664     int section_id;
1665     int version_id;
1666 };
1667 
1668 /*
1669  * Read a footer off the wire and check that it matches the expected section
1670  *
1671  * Returns: true if the footer was good
1672  *          false if there is a problem (and calls error_report to say why)
1673  */
1674 static bool check_section_footer(QEMUFile *f, LoadStateEntry *le)
1675 {
1676     uint8_t read_mark;
1677     uint32_t read_section_id;
1678 
1679     if (skip_section_footers) {
1680         /* No footer to check */
1681         return true;
1682     }
1683 
1684     read_mark = qemu_get_byte(f);
1685 
1686     if (read_mark != QEMU_VM_SECTION_FOOTER) {
1687         error_report("Missing section footer for %s", le->se->idstr);
1688         return false;
1689     }
1690 
1691     read_section_id = qemu_get_be32(f);
1692     if (read_section_id != le->section_id) {
1693         error_report("Mismatched section id in footer for %s -"
1694                      " read 0x%x expected 0x%x",
1695                      le->se->idstr, read_section_id, le->section_id);
1696         return false;
1697     }
1698 
1699     /* All good */
1700     return true;
1701 }
1702 
1703 void loadvm_free_handlers(MigrationIncomingState *mis)
1704 {
1705     LoadStateEntry *le, *new_le;
1706 
1707     QLIST_FOREACH_SAFE(le, &mis->loadvm_handlers, entry, new_le) {
1708         QLIST_REMOVE(le, entry);
1709         g_free(le);
1710     }
1711 }
1712 
1713 static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
1714 {
1715     uint8_t section_type;
1716     int ret;
1717 
1718     while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
1719         uint32_t instance_id, version_id, section_id;
1720         SaveStateEntry *se;
1721         LoadStateEntry *le;
1722         char idstr[256];
1723 
1724         trace_qemu_loadvm_state_section(section_type);
1725         switch (section_type) {
1726         case QEMU_VM_SECTION_START:
1727         case QEMU_VM_SECTION_FULL:
1728             /* Read section start */
1729             section_id = qemu_get_be32(f);
1730             if (!qemu_get_counted_string(f, idstr)) {
1731                 error_report("Unable to read ID string for section %u",
1732                             section_id);
1733                 return -EINVAL;
1734             }
1735             instance_id = qemu_get_be32(f);
1736             version_id = qemu_get_be32(f);
1737 
1738             trace_qemu_loadvm_state_section_startfull(section_id, idstr,
1739                                                       instance_id, version_id);
1740             /* Find savevm section */
1741             se = find_se(idstr, instance_id);
1742             if (se == NULL) {
1743                 error_report("Unknown savevm section or instance '%s' %d",
1744                              idstr, instance_id);
1745                 return -EINVAL;
1746             }
1747 
1748             /* Validate version */
1749             if (version_id > se->version_id) {
1750                 error_report("savevm: unsupported version %d for '%s' v%d",
1751                              version_id, idstr, se->version_id);
1752                 return -EINVAL;
1753             }
1754 
1755             /* Add entry */
1756             le = g_malloc0(sizeof(*le));
1757 
1758             le->se = se;
1759             le->section_id = section_id;
1760             le->version_id = version_id;
1761             QLIST_INSERT_HEAD(&mis->loadvm_handlers, le, entry);
1762 
1763             ret = vmstate_load(f, le->se, le->version_id);
1764             if (ret < 0) {
1765                 error_report("error while loading state for instance 0x%x of"
1766                              " device '%s'", instance_id, idstr);
1767                 return ret;
1768             }
1769             if (!check_section_footer(f, le)) {
1770                 return -EINVAL;
1771             }
1772             break;
1773         case QEMU_VM_SECTION_PART:
1774         case QEMU_VM_SECTION_END:
1775             section_id = qemu_get_be32(f);
1776 
1777             trace_qemu_loadvm_state_section_partend(section_id);
1778             QLIST_FOREACH(le, &mis->loadvm_handlers, entry) {
1779                 if (le->section_id == section_id) {
1780                     break;
1781                 }
1782             }
1783             if (le == NULL) {
1784                 error_report("Unknown savevm section %d", section_id);
1785                 return -EINVAL;
1786             }
1787 
1788             ret = vmstate_load(f, le->se, le->version_id);
1789             if (ret < 0) {
1790                 error_report("error while loading state section id %d(%s)",
1791                              section_id, le->se->idstr);
1792                 return ret;
1793             }
1794             if (!check_section_footer(f, le)) {
1795                 return -EINVAL;
1796             }
1797             break;
1798         case QEMU_VM_COMMAND:
1799             ret = loadvm_process_command(f);
1800             trace_qemu_loadvm_state_section_command(ret);
1801             if ((ret < 0) || (ret & LOADVM_QUIT)) {
1802                 return ret;
1803             }
1804             break;
1805         default:
1806             error_report("Unknown savevm section type %d", section_type);
1807             return -EINVAL;
1808         }
1809     }
1810 
1811     return 0;
1812 }
1813 
1814 int qemu_loadvm_state(QEMUFile *f)
1815 {
1816     MigrationIncomingState *mis = migration_incoming_get_current();
1817     Error *local_err = NULL;
1818     unsigned int v;
1819     int ret;
1820 
1821     if (qemu_savevm_state_blocked(&local_err)) {
1822         error_report_err(local_err);
1823         return -EINVAL;
1824     }
1825 
1826     v = qemu_get_be32(f);
1827     if (v != QEMU_VM_FILE_MAGIC) {
1828         error_report("Not a migration stream");
1829         return -EINVAL;
1830     }
1831 
1832     v = qemu_get_be32(f);
1833     if (v == QEMU_VM_FILE_VERSION_COMPAT) {
1834         error_report("SaveVM v2 format is obsolete and don't work anymore");
1835         return -ENOTSUP;
1836     }
1837     if (v != QEMU_VM_FILE_VERSION) {
1838         error_report("Unsupported migration stream version");
1839         return -ENOTSUP;
1840     }
1841 
1842     if (!savevm_state.skip_configuration) {
1843         if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
1844             error_report("Configuration section missing");
1845             return -EINVAL;
1846         }
1847         ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
1848 
1849         if (ret) {
1850             return ret;
1851         }
1852     }
1853 
1854     ret = qemu_loadvm_state_main(f, mis);
1855     qemu_event_set(&mis->main_thread_load_event);
1856 
1857     trace_qemu_loadvm_state_post_main(ret);
1858 
1859     if (mis->have_listen_thread) {
1860         /* Listen thread still going, can't clean up yet */
1861         return ret;
1862     }
1863 
1864     if (ret == 0) {
1865         ret = qemu_file_get_error(f);
1866     }
1867 
1868     /*
1869      * Try to read in the VMDESC section as well, so that dumping tools that
1870      * intercept our migration stream have the chance to see it.
1871      */
1872 
1873     /* We've got to be careful; if we don't read the data and just shut the fd
1874      * then the sender can error if we close while it's still sending.
1875      * We also mustn't read data that isn't there; some transports (RDMA)
1876      * will stall waiting for that data when the source has already closed.
1877      */
1878     if (ret == 0 && should_send_vmdesc()) {
1879         uint8_t *buf;
1880         uint32_t size;
1881         uint8_t  section_type = qemu_get_byte(f);
1882 
1883         if (section_type != QEMU_VM_VMDESCRIPTION) {
1884             error_report("Expected vmdescription section, but got %d",
1885                          section_type);
1886             /*
1887              * It doesn't seem worth failing at this point since
1888              * we apparently have an otherwise valid VM state
1889              */
1890         } else {
1891             buf = g_malloc(0x1000);
1892             size = qemu_get_be32(f);
1893 
1894             while (size > 0) {
1895                 uint32_t read_chunk = MIN(size, 0x1000);
1896                 qemu_get_buffer(f, buf, read_chunk);
1897                 size -= read_chunk;
1898             }
1899             g_free(buf);
1900         }
1901     }
1902 
1903     cpu_synchronize_all_post_init();
1904 
1905     return ret;
1906 }
1907 
1908 void hmp_savevm(Monitor *mon, const QDict *qdict)
1909 {
1910     BlockDriverState *bs, *bs1;
1911     QEMUSnapshotInfo sn1, *sn = &sn1, old_sn1, *old_sn = &old_sn1;
1912     int ret;
1913     QEMUFile *f;
1914     int saved_vm_running;
1915     uint64_t vm_state_size;
1916     qemu_timeval tv;
1917     struct tm tm;
1918     const char *name = qdict_get_try_str(qdict, "name");
1919     Error *local_err = NULL;
1920     AioContext *aio_context;
1921 
1922     if (!bdrv_all_can_snapshot(&bs)) {
1923         monitor_printf(mon, "Device '%s' is writable but does not "
1924                        "support snapshots.\n", bdrv_get_device_name(bs));
1925         return;
1926     }
1927 
1928     /* Delete old snapshots of the same name */
1929     if (name && bdrv_all_delete_snapshot(name, &bs1, &local_err) < 0) {
1930         monitor_printf(mon,
1931                        "Error while deleting snapshot on device '%s': %s\n",
1932                        bdrv_get_device_name(bs1), error_get_pretty(local_err));
1933         error_free(local_err);
1934         return;
1935     }
1936 
1937     bs = bdrv_all_find_vmstate_bs();
1938     if (bs == NULL) {
1939         monitor_printf(mon, "No block device can accept snapshots\n");
1940         return;
1941     }
1942     aio_context = bdrv_get_aio_context(bs);
1943 
1944     saved_vm_running = runstate_is_running();
1945 
1946     ret = global_state_store();
1947     if (ret) {
1948         monitor_printf(mon, "Error saving global state\n");
1949         return;
1950     }
1951     vm_stop(RUN_STATE_SAVE_VM);
1952 
1953     aio_context_acquire(aio_context);
1954 
1955     memset(sn, 0, sizeof(*sn));
1956 
1957     /* fill auxiliary fields */
1958     qemu_gettimeofday(&tv);
1959     sn->date_sec = tv.tv_sec;
1960     sn->date_nsec = tv.tv_usec * 1000;
1961     sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1962 
1963     if (name) {
1964         ret = bdrv_snapshot_find(bs, old_sn, name);
1965         if (ret >= 0) {
1966             pstrcpy(sn->name, sizeof(sn->name), old_sn->name);
1967             pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str);
1968         } else {
1969             pstrcpy(sn->name, sizeof(sn->name), name);
1970         }
1971     } else {
1972         /* cast below needed for OpenBSD where tv_sec is still 'long' */
1973         localtime_r((const time_t *)&tv.tv_sec, &tm);
1974         strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm);
1975     }
1976 
1977     /* save the VM state */
1978     f = qemu_fopen_bdrv(bs, 1);
1979     if (!f) {
1980         monitor_printf(mon, "Could not open VM state file\n");
1981         goto the_end;
1982     }
1983     ret = qemu_savevm_state(f, &local_err);
1984     vm_state_size = qemu_ftell(f);
1985     qemu_fclose(f);
1986     if (ret < 0) {
1987         monitor_printf(mon, "%s\n", error_get_pretty(local_err));
1988         error_free(local_err);
1989         goto the_end;
1990     }
1991 
1992     ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs);
1993     if (ret < 0) {
1994         monitor_printf(mon, "Error while creating snapshot on '%s'\n",
1995                        bdrv_get_device_name(bs));
1996     }
1997 
1998  the_end:
1999     aio_context_release(aio_context);
2000     if (saved_vm_running) {
2001         vm_start();
2002     }
2003 }
2004 
2005 void qmp_xen_save_devices_state(const char *filename, Error **errp)
2006 {
2007     QEMUFile *f;
2008     int saved_vm_running;
2009     int ret;
2010 
2011     saved_vm_running = runstate_is_running();
2012     vm_stop(RUN_STATE_SAVE_VM);
2013     global_state_store_running();
2014 
2015     f = qemu_fopen(filename, "wb");
2016     if (!f) {
2017         error_setg_file_open(errp, errno, filename);
2018         goto the_end;
2019     }
2020     ret = qemu_save_device_state(f);
2021     qemu_fclose(f);
2022     if (ret < 0) {
2023         error_setg(errp, QERR_IO_ERROR);
2024     }
2025 
2026  the_end:
2027     if (saved_vm_running) {
2028         vm_start();
2029     }
2030 }
2031 
2032 int load_vmstate(const char *name)
2033 {
2034     BlockDriverState *bs, *bs_vm_state;
2035     QEMUSnapshotInfo sn;
2036     QEMUFile *f;
2037     int ret;
2038     AioContext *aio_context;
2039 
2040     if (!bdrv_all_can_snapshot(&bs)) {
2041         error_report("Device '%s' is writable but does not support snapshots.",
2042                      bdrv_get_device_name(bs));
2043         return -ENOTSUP;
2044     }
2045     ret = bdrv_all_find_snapshot(name, &bs);
2046     if (ret < 0) {
2047         error_report("Device '%s' does not have the requested snapshot '%s'",
2048                      bdrv_get_device_name(bs), name);
2049         return ret;
2050     }
2051 
2052     bs_vm_state = bdrv_all_find_vmstate_bs();
2053     if (!bs_vm_state) {
2054         error_report("No block device supports snapshots");
2055         return -ENOTSUP;
2056     }
2057     aio_context = bdrv_get_aio_context(bs_vm_state);
2058 
2059     /* Don't even try to load empty VM states */
2060     aio_context_acquire(aio_context);
2061     ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
2062     aio_context_release(aio_context);
2063     if (ret < 0) {
2064         return ret;
2065     } else if (sn.vm_state_size == 0) {
2066         error_report("This is a disk-only snapshot. Revert to it offline "
2067             "using qemu-img.");
2068         return -EINVAL;
2069     }
2070 
2071     /* Flush all IO requests so they don't interfere with the new state.  */
2072     bdrv_drain_all();
2073 
2074     ret = bdrv_all_goto_snapshot(name, &bs);
2075     if (ret < 0) {
2076         error_report("Error %d while activating snapshot '%s' on '%s'",
2077                      ret, name, bdrv_get_device_name(bs));
2078         return ret;
2079     }
2080 
2081     /* restore the VM state */
2082     f = qemu_fopen_bdrv(bs_vm_state, 0);
2083     if (!f) {
2084         error_report("Could not open VM state file");
2085         return -EINVAL;
2086     }
2087 
2088     qemu_system_reset(VMRESET_SILENT);
2089     migration_incoming_state_new(f);
2090 
2091     aio_context_acquire(aio_context);
2092     ret = qemu_loadvm_state(f);
2093     qemu_fclose(f);
2094     aio_context_release(aio_context);
2095 
2096     migration_incoming_state_destroy();
2097     if (ret < 0) {
2098         error_report("Error %d while loading VM state", ret);
2099         return ret;
2100     }
2101 
2102     return 0;
2103 }
2104 
2105 void hmp_delvm(Monitor *mon, const QDict *qdict)
2106 {
2107     BlockDriverState *bs;
2108     Error *err;
2109     const char *name = qdict_get_str(qdict, "name");
2110 
2111     if (bdrv_all_delete_snapshot(name, &bs, &err) < 0) {
2112         monitor_printf(mon,
2113                        "Error while deleting snapshot on device '%s': %s\n",
2114                        bdrv_get_device_name(bs), error_get_pretty(err));
2115         error_free(err);
2116     }
2117 }
2118 
2119 void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
2120 {
2121     BlockDriverState *bs, *bs1;
2122     QEMUSnapshotInfo *sn_tab, *sn;
2123     int nb_sns, i;
2124     int total;
2125     int *available_snapshots;
2126     AioContext *aio_context;
2127 
2128     bs = bdrv_all_find_vmstate_bs();
2129     if (!bs) {
2130         monitor_printf(mon, "No available block device supports snapshots\n");
2131         return;
2132     }
2133     aio_context = bdrv_get_aio_context(bs);
2134 
2135     aio_context_acquire(aio_context);
2136     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2137     aio_context_release(aio_context);
2138 
2139     if (nb_sns < 0) {
2140         monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns);
2141         return;
2142     }
2143 
2144     if (nb_sns == 0) {
2145         monitor_printf(mon, "There is no snapshot available.\n");
2146         return;
2147     }
2148 
2149     available_snapshots = g_new0(int, nb_sns);
2150     total = 0;
2151     for (i = 0; i < nb_sns; i++) {
2152         if (bdrv_all_find_snapshot(sn_tab[i].id_str, &bs1) == 0) {
2153             available_snapshots[total] = i;
2154             total++;
2155         }
2156     }
2157 
2158     if (total > 0) {
2159         bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, NULL);
2160         monitor_printf(mon, "\n");
2161         for (i = 0; i < total; i++) {
2162             sn = &sn_tab[available_snapshots[i]];
2163             bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, sn);
2164             monitor_printf(mon, "\n");
2165         }
2166     } else {
2167         monitor_printf(mon, "There is no suitable snapshot available\n");
2168     }
2169 
2170     g_free(sn_tab);
2171     g_free(available_snapshots);
2172 
2173 }
2174 
2175 void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
2176 {
2177     qemu_ram_set_idstr(memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK,
2178                        memory_region_name(mr), dev);
2179 }
2180 
2181 void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
2182 {
2183     qemu_ram_unset_idstr(memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK);
2184 }
2185 
2186 void vmstate_register_ram_global(MemoryRegion *mr)
2187 {
2188     vmstate_register_ram(mr, NULL);
2189 }
2190