xref: /openbmc/qemu/migration/savevm.c (revision 63e6b5645021bb2b545a39f2896a42da5c300d9c)
1  /*
2   * QEMU System Emulator
3   *
4   * Copyright (c) 2003-2008 Fabrice Bellard
5   * Copyright (c) 2009-2015 Red Hat Inc
6   *
7   * Authors:
8   *  Juan Quintela <quintela@redhat.com>
9   *
10   * Permission is hereby granted, free of charge, to any person obtaining a copy
11   * of this software and associated documentation files (the "Software"), to deal
12   * in the Software without restriction, including without limitation the rights
13   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14   * copies of the Software, and to permit persons to whom the Software is
15   * furnished to do so, subject to the following conditions:
16   *
17   * The above copyright notice and this permission notice shall be included in
18   * all copies or substantial portions of the Software.
19   *
20   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26   * THE SOFTWARE.
27   */
28  
29  #include "qemu/osdep.h"
30  #include "hw/boards.h"
31  #include "net/net.h"
32  #include "migration.h"
33  #include "migration/snapshot.h"
34  #include "migration-stats.h"
35  #include "migration/vmstate.h"
36  #include "migration/misc.h"
37  #include "migration/register.h"
38  #include "migration/global_state.h"
39  #include "migration/channel-block.h"
40  #include "ram.h"
41  #include "qemu-file.h"
42  #include "savevm.h"
43  #include "postcopy-ram.h"
44  #include "qapi/error.h"
45  #include "qapi/qapi-commands-migration.h"
46  #include "qapi/clone-visitor.h"
47  #include "qapi/qapi-builtin-visit.h"
48  #include "qapi/qmp/qerror.h"
49  #include "qemu/error-report.h"
50  #include "sysemu/cpus.h"
51  #include "exec/memory.h"
52  #include "exec/target_page.h"
53  #include "trace.h"
54  #include "qemu/iov.h"
55  #include "qemu/job.h"
56  #include "qemu/main-loop.h"
57  #include "block/snapshot.h"
58  #include "qemu/cutils.h"
59  #include "io/channel-buffer.h"
60  #include "io/channel-file.h"
61  #include "sysemu/replay.h"
62  #include "sysemu/runstate.h"
63  #include "sysemu/sysemu.h"
64  #include "sysemu/xen.h"
65  #include "migration/colo.h"
66  #include "qemu/bitmap.h"
67  #include "net/announce.h"
68  #include "qemu/yank.h"
69  #include "yank_functions.h"
70  #include "sysemu/qtest.h"
71  #include "options.h"
72  
73  const unsigned int postcopy_ram_discard_version;
74  
75  /* Subcommands for QEMU_VM_COMMAND */
76  enum qemu_vm_cmd {
77      MIG_CMD_INVALID = 0,   /* Must be 0 */
78      MIG_CMD_OPEN_RETURN_PATH,  /* Tell the dest to open the Return path */
79      MIG_CMD_PING,              /* Request a PONG on the RP */
80  
81      MIG_CMD_POSTCOPY_ADVISE,       /* Prior to any page transfers, just
82                                        warn we might want to do PC */
83      MIG_CMD_POSTCOPY_LISTEN,       /* Start listening for incoming
84                                        pages as it's running. */
85      MIG_CMD_POSTCOPY_RUN,          /* Start execution */
86  
87      MIG_CMD_POSTCOPY_RAM_DISCARD,  /* A list of pages to discard that
88                                        were previously sent during
89                                        precopy but are dirty. */
90      MIG_CMD_PACKAGED,          /* Send a wrapped stream within this stream */
91      MIG_CMD_ENABLE_COLO,       /* Enable COLO */
92      MIG_CMD_POSTCOPY_RESUME,   /* resume postcopy on dest */
93      MIG_CMD_RECV_BITMAP,       /* Request for recved bitmap on dst */
94      MIG_CMD_MAX
95  };
96  
97  #define MAX_VM_CMD_PACKAGED_SIZE UINT32_MAX
98  static struct mig_cmd_args {
99      ssize_t     len; /* -1 = variable */
100      const char *name;
101  } mig_cmd_args[] = {
102      [MIG_CMD_INVALID]          = { .len = -1, .name = "INVALID" },
103      [MIG_CMD_OPEN_RETURN_PATH] = { .len =  0, .name = "OPEN_RETURN_PATH" },
104      [MIG_CMD_PING]             = { .len = sizeof(uint32_t), .name = "PING" },
105      [MIG_CMD_POSTCOPY_ADVISE]  = { .len = -1, .name = "POSTCOPY_ADVISE" },
106      [MIG_CMD_POSTCOPY_LISTEN]  = { .len =  0, .name = "POSTCOPY_LISTEN" },
107      [MIG_CMD_POSTCOPY_RUN]     = { .len =  0, .name = "POSTCOPY_RUN" },
108      [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
109                                     .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
110      [MIG_CMD_POSTCOPY_RESUME]  = { .len =  0, .name = "POSTCOPY_RESUME" },
111      [MIG_CMD_PACKAGED]         = { .len =  4, .name = "PACKAGED" },
112      [MIG_CMD_RECV_BITMAP]      = { .len = -1, .name = "RECV_BITMAP" },
113      [MIG_CMD_MAX]              = { .len = -1, .name = "MAX" },
114  };
115  
116  /* Note for MIG_CMD_POSTCOPY_ADVISE:
117   * The format of arguments is depending on postcopy mode:
118   * - postcopy RAM only
119   *   uint64_t host page size
120   *   uint64_t target page size
121   *
122   * - postcopy RAM and postcopy dirty bitmaps
123   *   format is the same as for postcopy RAM only
124   *
125   * - postcopy dirty bitmaps only
126   *   Nothing. Command length field is 0.
127   *
128   * Be careful: adding a new postcopy entity with some other parameters should
129   * not break format self-description ability. Good way is to introduce some
130   * generic extendable format with an exception for two old entities.
131   */
132  
133  /***********************************************************/
134  /* savevm/loadvm support */
135  
136  static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
137  {
138      if (is_writable) {
139          return qemu_file_new_output(QIO_CHANNEL(qio_channel_block_new(bs)));
140      } else {
141          return qemu_file_new_input(QIO_CHANNEL(qio_channel_block_new(bs)));
142      }
143  }
144  
145  
146  /* QEMUFile timer support.
147   * Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c
148   */
149  
150  void timer_put(QEMUFile *f, QEMUTimer *ts)
151  {
152      uint64_t expire_time;
153  
154      expire_time = timer_expire_time_ns(ts);
155      qemu_put_be64(f, expire_time);
156  }
157  
158  void timer_get(QEMUFile *f, QEMUTimer *ts)
159  {
160      uint64_t expire_time;
161  
162      expire_time = qemu_get_be64(f);
163      if (expire_time != -1) {
164          timer_mod_ns(ts, expire_time);
165      } else {
166          timer_del(ts);
167      }
168  }
169  
170  
171  /* VMState timer support.
172   * Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c
173   */
174  
175  static int get_timer(QEMUFile *f, void *pv, size_t size,
176                       const VMStateField *field)
177  {
178      QEMUTimer *v = pv;
179      timer_get(f, v);
180      return 0;
181  }
182  
183  static int put_timer(QEMUFile *f, void *pv, size_t size,
184                       const VMStateField *field, JSONWriter *vmdesc)
185  {
186      QEMUTimer *v = pv;
187      timer_put(f, v);
188  
189      return 0;
190  }
191  
192  const VMStateInfo vmstate_info_timer = {
193      .name = "timer",
194      .get  = get_timer,
195      .put  = put_timer,
196  };
197  
198  
199  typedef struct CompatEntry {
200      char idstr[256];
201      int instance_id;
202  } CompatEntry;
203  
204  typedef struct SaveStateEntry {
205      QTAILQ_ENTRY(SaveStateEntry) entry;
206      char idstr[256];
207      uint32_t instance_id;
208      int alias_id;
209      int version_id;
210      /* version id read from the stream */
211      int load_version_id;
212      int section_id;
213      /* section id read from the stream */
214      int load_section_id;
215      const SaveVMHandlers *ops;
216      const VMStateDescription *vmsd;
217      void *opaque;
218      CompatEntry *compat;
219      int is_ram;
220  } SaveStateEntry;
221  
222  typedef struct SaveState {
223      QTAILQ_HEAD(, SaveStateEntry) handlers;
224      SaveStateEntry *handler_pri_head[MIG_PRI_MAX + 1];
225      int global_section_id;
226      uint32_t len;
227      const char *name;
228      uint32_t target_page_bits;
229      uint32_t caps_count;
230      MigrationCapability *capabilities;
231      QemuUUID uuid;
232  } SaveState;
233  
234  static SaveState savevm_state = {
235      .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
236      .handler_pri_head = { [MIG_PRI_DEFAULT ... MIG_PRI_MAX] = NULL },
237      .global_section_id = 0,
238  };
239  
240  static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id);
241  
242  static bool should_validate_capability(int capability)
243  {
244      assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX);
245      /* Validate only new capabilities to keep compatibility. */
246      switch (capability) {
247      case MIGRATION_CAPABILITY_X_IGNORE_SHARED:
248          return true;
249      default:
250          return false;
251      }
252  }
253  
254  static uint32_t get_validatable_capabilities_count(void)
255  {
256      MigrationState *s = migrate_get_current();
257      uint32_t result = 0;
258      int i;
259      for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
260          if (should_validate_capability(i) && s->capabilities[i]) {
261              result++;
262          }
263      }
264      return result;
265  }
266  
267  static int configuration_pre_save(void *opaque)
268  {
269      SaveState *state = opaque;
270      const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
271      MigrationState *s = migrate_get_current();
272      int i, j;
273  
274      state->len = strlen(current_name);
275      state->name = current_name;
276      state->target_page_bits = qemu_target_page_bits();
277  
278      state->caps_count = get_validatable_capabilities_count();
279      state->capabilities = g_renew(MigrationCapability, state->capabilities,
280                                    state->caps_count);
281      for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
282          if (should_validate_capability(i) && s->capabilities[i]) {
283              state->capabilities[j++] = i;
284          }
285      }
286      state->uuid = qemu_uuid;
287  
288      return 0;
289  }
290  
291  static int configuration_post_save(void *opaque)
292  {
293      SaveState *state = opaque;
294  
295      g_free(state->capabilities);
296      state->capabilities = NULL;
297      state->caps_count = 0;
298      return 0;
299  }
300  
301  static int configuration_pre_load(void *opaque)
302  {
303      SaveState *state = opaque;
304  
305      /* If there is no target-page-bits subsection it means the source
306       * predates the variable-target-page-bits support and is using the
307       * minimum possible value for this CPU.
308       */
309      state->target_page_bits = qemu_target_page_bits_min();
310      return 0;
311  }
312  
313  static bool configuration_validate_capabilities(SaveState *state)
314  {
315      bool ret = true;
316      MigrationState *s = migrate_get_current();
317      unsigned long *source_caps_bm;
318      int i;
319  
320      source_caps_bm = bitmap_new(MIGRATION_CAPABILITY__MAX);
321      for (i = 0; i < state->caps_count; i++) {
322          MigrationCapability capability = state->capabilities[i];
323          set_bit(capability, source_caps_bm);
324      }
325  
326      for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
327          bool source_state, target_state;
328          if (!should_validate_capability(i)) {
329              continue;
330          }
331          source_state = test_bit(i, source_caps_bm);
332          target_state = s->capabilities[i];
333          if (source_state != target_state) {
334              error_report("Capability %s is %s, but received capability is %s",
335                           MigrationCapability_str(i),
336                           target_state ? "on" : "off",
337                           source_state ? "on" : "off");
338              ret = false;
339              /* Don't break here to report all failed capabilities */
340          }
341      }
342  
343      g_free(source_caps_bm);
344      return ret;
345  }
346  
347  static int configuration_post_load(void *opaque, int version_id)
348  {
349      SaveState *state = opaque;
350      const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
351      int ret = 0;
352  
353      if (strncmp(state->name, current_name, state->len) != 0) {
354          error_report("Machine type received is '%.*s' and local is '%s'",
355                       (int) state->len, state->name, current_name);
356          ret = -EINVAL;
357          goto out;
358      }
359  
360      if (state->target_page_bits != qemu_target_page_bits()) {
361          error_report("Received TARGET_PAGE_BITS is %d but local is %d",
362                       state->target_page_bits, qemu_target_page_bits());
363          ret = -EINVAL;
364          goto out;
365      }
366  
367      if (!configuration_validate_capabilities(state)) {
368          ret = -EINVAL;
369          goto out;
370      }
371  
372  out:
373      g_free((void *)state->name);
374      state->name = NULL;
375      state->len = 0;
376      g_free(state->capabilities);
377      state->capabilities = NULL;
378      state->caps_count = 0;
379  
380      return ret;
381  }
382  
383  static int get_capability(QEMUFile *f, void *pv, size_t size,
384                            const VMStateField *field)
385  {
386      MigrationCapability *capability = pv;
387      char capability_str[UINT8_MAX + 1];
388      uint8_t len;
389      int i;
390  
391      len = qemu_get_byte(f);
392      qemu_get_buffer(f, (uint8_t *)capability_str, len);
393      capability_str[len] = '\0';
394      for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
395          if (!strcmp(MigrationCapability_str(i), capability_str)) {
396              *capability = i;
397              return 0;
398          }
399      }
400      error_report("Received unknown capability %s", capability_str);
401      return -EINVAL;
402  }
403  
404  static int put_capability(QEMUFile *f, void *pv, size_t size,
405                            const VMStateField *field, JSONWriter *vmdesc)
406  {
407      MigrationCapability *capability = pv;
408      const char *capability_str = MigrationCapability_str(*capability);
409      size_t len = strlen(capability_str);
410      assert(len <= UINT8_MAX);
411  
412      qemu_put_byte(f, len);
413      qemu_put_buffer(f, (uint8_t *)capability_str, len);
414      return 0;
415  }
416  
417  static const VMStateInfo vmstate_info_capability = {
418      .name = "capability",
419      .get  = get_capability,
420      .put  = put_capability,
421  };
422  
423  /* The target-page-bits subsection is present only if the
424   * target page size is not the same as the default (ie the
425   * minimum page size for a variable-page-size guest CPU).
426   * If it is present then it contains the actual target page
427   * bits for the machine, and migration will fail if the
428   * two ends don't agree about it.
429   */
430  static bool vmstate_target_page_bits_needed(void *opaque)
431  {
432      return qemu_target_page_bits()
433          > qemu_target_page_bits_min();
434  }
435  
436  static const VMStateDescription vmstate_target_page_bits = {
437      .name = "configuration/target-page-bits",
438      .version_id = 1,
439      .minimum_version_id = 1,
440      .needed = vmstate_target_page_bits_needed,
441      .fields = (VMStateField[]) {
442          VMSTATE_UINT32(target_page_bits, SaveState),
443          VMSTATE_END_OF_LIST()
444      }
445  };
446  
447  static bool vmstate_capabilites_needed(void *opaque)
448  {
449      return get_validatable_capabilities_count() > 0;
450  }
451  
452  static const VMStateDescription vmstate_capabilites = {
453      .name = "configuration/capabilities",
454      .version_id = 1,
455      .minimum_version_id = 1,
456      .needed = vmstate_capabilites_needed,
457      .fields = (VMStateField[]) {
458          VMSTATE_UINT32_V(caps_count, SaveState, 1),
459          VMSTATE_VARRAY_UINT32_ALLOC(capabilities, SaveState, caps_count, 1,
460                                      vmstate_info_capability,
461                                      MigrationCapability),
462          VMSTATE_END_OF_LIST()
463      }
464  };
465  
466  static bool vmstate_uuid_needed(void *opaque)
467  {
468      return qemu_uuid_set && migrate_validate_uuid();
469  }
470  
471  static int vmstate_uuid_post_load(void *opaque, int version_id)
472  {
473      SaveState *state = opaque;
474      char uuid_src[UUID_STR_LEN];
475      char uuid_dst[UUID_STR_LEN];
476  
477      if (!qemu_uuid_set) {
478          /*
479           * It's warning because user might not know UUID in some cases,
480           * e.g. load an old snapshot
481           */
482          qemu_uuid_unparse(&state->uuid, uuid_src);
483          warn_report("UUID is received %s, but local uuid isn't set",
484                       uuid_src);
485          return 0;
486      }
487      if (!qemu_uuid_is_equal(&state->uuid, &qemu_uuid)) {
488          qemu_uuid_unparse(&state->uuid, uuid_src);
489          qemu_uuid_unparse(&qemu_uuid, uuid_dst);
490          error_report("UUID received is %s and local is %s", uuid_src, uuid_dst);
491          return -EINVAL;
492      }
493      return 0;
494  }
495  
496  static const VMStateDescription vmstate_uuid = {
497      .name = "configuration/uuid",
498      .version_id = 1,
499      .minimum_version_id = 1,
500      .needed = vmstate_uuid_needed,
501      .post_load = vmstate_uuid_post_load,
502      .fields = (VMStateField[]) {
503          VMSTATE_UINT8_ARRAY_V(uuid.data, SaveState, sizeof(QemuUUID), 1),
504          VMSTATE_END_OF_LIST()
505      }
506  };
507  
508  static const VMStateDescription vmstate_configuration = {
509      .name = "configuration",
510      .version_id = 1,
511      .pre_load = configuration_pre_load,
512      .post_load = configuration_post_load,
513      .pre_save = configuration_pre_save,
514      .post_save = configuration_post_save,
515      .fields = (VMStateField[]) {
516          VMSTATE_UINT32(len, SaveState),
517          VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
518          VMSTATE_END_OF_LIST()
519      },
520      .subsections = (const VMStateDescription *[]) {
521          &vmstate_target_page_bits,
522          &vmstate_capabilites,
523          &vmstate_uuid,
524          NULL
525      }
526  };
527  
528  static void dump_vmstate_vmsd(FILE *out_file,
529                                const VMStateDescription *vmsd, int indent,
530                                bool is_subsection);
531  
532  static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
533                                int indent)
534  {
535      fprintf(out_file, "%*s{\n", indent, "");
536      indent += 2;
537      fprintf(out_file, "%*s\"field\": \"%s\",\n", indent, "", field->name);
538      fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
539              field->version_id);
540      fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "",
541              field->field_exists ? "true" : "false");
542      if (field->flags & VMS_ARRAY) {
543          fprintf(out_file, "%*s\"num\": %d,\n", indent, "", field->num);
544      }
545      fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size);
546      if (field->vmsd != NULL) {
547          fprintf(out_file, ",\n");
548          dump_vmstate_vmsd(out_file, field->vmsd, indent, false);
549      }
550      fprintf(out_file, "\n%*s}", indent - 2, "");
551  }
552  
553  static void dump_vmstate_vmss(FILE *out_file,
554                                const VMStateDescription *subsection,
555                                int indent)
556  {
557      if (subsection != NULL) {
558          dump_vmstate_vmsd(out_file, subsection, indent, true);
559      }
560  }
561  
562  static void dump_vmstate_vmsd(FILE *out_file,
563                                const VMStateDescription *vmsd, int indent,
564                                bool is_subsection)
565  {
566      if (is_subsection) {
567          fprintf(out_file, "%*s{\n", indent, "");
568      } else {
569          fprintf(out_file, "%*s\"%s\": {\n", indent, "", "Description");
570      }
571      indent += 2;
572      fprintf(out_file, "%*s\"name\": \"%s\",\n", indent, "", vmsd->name);
573      fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
574              vmsd->version_id);
575      fprintf(out_file, "%*s\"minimum_version_id\": %d", indent, "",
576              vmsd->minimum_version_id);
577      if (vmsd->fields != NULL) {
578          const VMStateField *field = vmsd->fields;
579          bool first;
580  
581          fprintf(out_file, ",\n%*s\"Fields\": [\n", indent, "");
582          first = true;
583          while (field->name != NULL) {
584              if (field->flags & VMS_MUST_EXIST) {
585                  /* Ignore VMSTATE_VALIDATE bits; these don't get migrated */
586                  field++;
587                  continue;
588              }
589              if (!first) {
590                  fprintf(out_file, ",\n");
591              }
592              dump_vmstate_vmsf(out_file, field, indent + 2);
593              field++;
594              first = false;
595          }
596          assert(field->flags == VMS_END);
597          fprintf(out_file, "\n%*s]", indent, "");
598      }
599      if (vmsd->subsections != NULL) {
600          const VMStateDescription * const *subsection = vmsd->subsections;
601          bool first;
602  
603          fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
604          first = true;
605          while (*subsection != NULL) {
606              if (!first) {
607                  fprintf(out_file, ",\n");
608              }
609              dump_vmstate_vmss(out_file, *subsection, indent + 2);
610              subsection++;
611              first = false;
612          }
613          fprintf(out_file, "\n%*s]", indent, "");
614      }
615      fprintf(out_file, "\n%*s}", indent - 2, "");
616  }
617  
618  static void dump_machine_type(FILE *out_file)
619  {
620      MachineClass *mc;
621  
622      mc = MACHINE_GET_CLASS(current_machine);
623  
624      fprintf(out_file, "  \"vmschkmachine\": {\n");
625      fprintf(out_file, "    \"Name\": \"%s\"\n", mc->name);
626      fprintf(out_file, "  },\n");
627  }
628  
629  void dump_vmstate_json_to_file(FILE *out_file)
630  {
631      GSList *list, *elt;
632      bool first;
633  
634      fprintf(out_file, "{\n");
635      dump_machine_type(out_file);
636  
637      first = true;
638      list = object_class_get_list(TYPE_DEVICE, true);
639      for (elt = list; elt; elt = elt->next) {
640          DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
641                                               TYPE_DEVICE);
642          const char *name;
643          int indent = 2;
644  
645          if (!dc->vmsd) {
646              continue;
647          }
648  
649          if (!first) {
650              fprintf(out_file, ",\n");
651          }
652          name = object_class_get_name(OBJECT_CLASS(dc));
653          fprintf(out_file, "%*s\"%s\": {\n", indent, "", name);
654          indent += 2;
655          fprintf(out_file, "%*s\"Name\": \"%s\",\n", indent, "", name);
656          fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
657                  dc->vmsd->version_id);
658          fprintf(out_file, "%*s\"minimum_version_id\": %d,\n", indent, "",
659                  dc->vmsd->minimum_version_id);
660  
661          dump_vmstate_vmsd(out_file, dc->vmsd, indent, false);
662  
663          fprintf(out_file, "\n%*s}", indent - 2, "");
664          first = false;
665      }
666      fprintf(out_file, "\n}\n");
667      fclose(out_file);
668      g_slist_free(list);
669  }
670  
671  static uint32_t calculate_new_instance_id(const char *idstr)
672  {
673      SaveStateEntry *se;
674      uint32_t instance_id = 0;
675  
676      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
677          if (strcmp(idstr, se->idstr) == 0
678              && instance_id <= se->instance_id) {
679              instance_id = se->instance_id + 1;
680          }
681      }
682      /* Make sure we never loop over without being noticed */
683      assert(instance_id != VMSTATE_INSTANCE_ID_ANY);
684      return instance_id;
685  }
686  
687  static int calculate_compat_instance_id(const char *idstr)
688  {
689      SaveStateEntry *se;
690      int instance_id = 0;
691  
692      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
693          if (!se->compat) {
694              continue;
695          }
696  
697          if (strcmp(idstr, se->compat->idstr) == 0
698              && instance_id <= se->compat->instance_id) {
699              instance_id = se->compat->instance_id + 1;
700          }
701      }
702      return instance_id;
703  }
704  
705  static inline MigrationPriority save_state_priority(SaveStateEntry *se)
706  {
707      if (se->vmsd) {
708          return se->vmsd->priority;
709      }
710      return MIG_PRI_DEFAULT;
711  }
712  
713  static void savevm_state_handler_insert(SaveStateEntry *nse)
714  {
715      MigrationPriority priority = save_state_priority(nse);
716      SaveStateEntry *se;
717      int i;
718  
719      assert(priority <= MIG_PRI_MAX);
720  
721      /*
722       * This should never happen otherwise migration will probably fail
723       * silently somewhere because we can be wrongly applying one
724       * object properties upon another one.  Bail out ASAP.
725       */
726      if (find_se(nse->idstr, nse->instance_id)) {
727          error_report("%s: Detected duplicate SaveStateEntry: "
728                       "id=%s, instance_id=0x%"PRIx32, __func__,
729                       nse->idstr, nse->instance_id);
730          exit(EXIT_FAILURE);
731      }
732  
733      for (i = priority - 1; i >= 0; i--) {
734          se = savevm_state.handler_pri_head[i];
735          if (se != NULL) {
736              assert(save_state_priority(se) < priority);
737              break;
738          }
739      }
740  
741      if (i >= 0) {
742          QTAILQ_INSERT_BEFORE(se, nse, entry);
743      } else {
744          QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry);
745      }
746  
747      if (savevm_state.handler_pri_head[priority] == NULL) {
748          savevm_state.handler_pri_head[priority] = nse;
749      }
750  }
751  
752  static void savevm_state_handler_remove(SaveStateEntry *se)
753  {
754      SaveStateEntry *next;
755      MigrationPriority priority = save_state_priority(se);
756  
757      if (se == savevm_state.handler_pri_head[priority]) {
758          next = QTAILQ_NEXT(se, entry);
759          if (next != NULL && save_state_priority(next) == priority) {
760              savevm_state.handler_pri_head[priority] = next;
761          } else {
762              savevm_state.handler_pri_head[priority] = NULL;
763          }
764      }
765      QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
766  }
767  
768  /* TODO: Individual devices generally have very little idea about the rest
769     of the system, so instance_id should be removed/replaced.
770     Meanwhile pass -1 as instance_id if you do not already have a clearly
771     distinguishing id for all instances of your device class. */
772  int register_savevm_live(const char *idstr,
773                           uint32_t instance_id,
774                           int version_id,
775                           const SaveVMHandlers *ops,
776                           void *opaque)
777  {
778      SaveStateEntry *se;
779  
780      se = g_new0(SaveStateEntry, 1);
781      se->version_id = version_id;
782      se->section_id = savevm_state.global_section_id++;
783      se->ops = ops;
784      se->opaque = opaque;
785      se->vmsd = NULL;
786      /* if this is a live_savem then set is_ram */
787      if (ops->save_setup != NULL) {
788          se->is_ram = 1;
789      }
790  
791      pstrcat(se->idstr, sizeof(se->idstr), idstr);
792  
793      if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
794          se->instance_id = calculate_new_instance_id(se->idstr);
795      } else {
796          se->instance_id = instance_id;
797      }
798      assert(!se->compat || se->instance_id == 0);
799      savevm_state_handler_insert(se);
800      return 0;
801  }
802  
803  void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque)
804  {
805      SaveStateEntry *se, *new_se;
806      char id[256] = "";
807  
808      if (obj) {
809          char *oid = vmstate_if_get_id(obj);
810          if (oid) {
811              pstrcpy(id, sizeof(id), oid);
812              pstrcat(id, sizeof(id), "/");
813              g_free(oid);
814          }
815      }
816      pstrcat(id, sizeof(id), idstr);
817  
818      QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
819          if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
820              savevm_state_handler_remove(se);
821              g_free(se->compat);
822              g_free(se);
823          }
824      }
825  }
826  
827  /*
828   * Perform some basic checks on vmsd's at registration
829   * time.
830   */
831  static void vmstate_check(const VMStateDescription *vmsd)
832  {
833      const VMStateField *field = vmsd->fields;
834      const VMStateDescription * const *subsection = vmsd->subsections;
835  
836      if (field) {
837          while (field->name) {
838              if (field->flags & (VMS_STRUCT | VMS_VSTRUCT)) {
839                  /* Recurse to sub structures */
840                  vmstate_check(field->vmsd);
841              }
842              /* Carry on */
843              field++;
844          }
845          /* Check for the end of field list canary */
846          if (field->flags != VMS_END) {
847              error_report("VMSTATE not ending with VMS_END: %s", vmsd->name);
848              g_assert_not_reached();
849          }
850      }
851  
852      while (subsection && *subsection) {
853          /*
854           * The name of a subsection should start with the name of the
855           * current object.
856           */
857          assert(!strncmp(vmsd->name, (*subsection)->name, strlen(vmsd->name)));
858          vmstate_check(*subsection);
859          subsection++;
860      }
861  }
862  
863  /*
864   * See comment in hw/intc/xics.c:icp_realize()
865   *
866   * This function can be removed when
867   * pre_2_10_vmstate_register_dummy_icp() is removed.
868   */
869  int vmstate_replace_hack_for_ppc(VMStateIf *obj, int instance_id,
870                                   const VMStateDescription *vmsd,
871                                   void *opaque)
872  {
873      SaveStateEntry *se = find_se(vmsd->name, instance_id);
874  
875      if (se) {
876          savevm_state_handler_remove(se);
877      }
878      return vmstate_register(obj, instance_id, vmsd, opaque);
879  }
880  
881  int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
882                                     const VMStateDescription *vmsd,
883                                     void *opaque, int alias_id,
884                                     int required_for_version,
885                                     Error **errp)
886  {
887      SaveStateEntry *se;
888  
889      /* If this triggers, alias support can be dropped for the vmsd. */
890      assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
891  
892      se = g_new0(SaveStateEntry, 1);
893      se->version_id = vmsd->version_id;
894      se->section_id = savevm_state.global_section_id++;
895      se->opaque = opaque;
896      se->vmsd = vmsd;
897      se->alias_id = alias_id;
898  
899      if (obj) {
900          char *id = vmstate_if_get_id(obj);
901          if (id) {
902              if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >=
903                  sizeof(se->idstr)) {
904                  error_setg(errp, "Path too long for VMState (%s)", id);
905                  g_free(id);
906                  g_free(se);
907  
908                  return -1;
909              }
910              g_free(id);
911  
912              se->compat = g_new0(CompatEntry, 1);
913              pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
914              se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ?
915                           calculate_compat_instance_id(vmsd->name) : instance_id;
916              instance_id = VMSTATE_INSTANCE_ID_ANY;
917          }
918      }
919      pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
920  
921      if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
922          se->instance_id = calculate_new_instance_id(se->idstr);
923      } else {
924          se->instance_id = instance_id;
925      }
926  
927      /* Perform a recursive sanity check during the test runs */
928      if (qtest_enabled()) {
929          vmstate_check(vmsd);
930      }
931      assert(!se->compat || se->instance_id == 0);
932      savevm_state_handler_insert(se);
933      return 0;
934  }
935  
936  void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd,
937                          void *opaque)
938  {
939      SaveStateEntry *se, *new_se;
940  
941      QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
942          if (se->vmsd == vmsd && se->opaque == opaque) {
943              savevm_state_handler_remove(se);
944              g_free(se->compat);
945              g_free(se);
946          }
947      }
948  }
949  
950  static int vmstate_load(QEMUFile *f, SaveStateEntry *se)
951  {
952      trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
953      if (!se->vmsd) {         /* Old style */
954          return se->ops->load_state(f, se->opaque, se->load_version_id);
955      }
956      return vmstate_load_state(f, se->vmsd, se->opaque, se->load_version_id);
957  }
958  
959  static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se,
960                                     JSONWriter *vmdesc)
961  {
962      uint64_t old_offset = qemu_file_transferred(f);
963      se->ops->save_state(f, se->opaque);
964      uint64_t size = qemu_file_transferred(f) - old_offset;
965  
966      if (vmdesc) {
967          json_writer_int64(vmdesc, "size", size);
968          json_writer_start_array(vmdesc, "fields");
969          json_writer_start_object(vmdesc, NULL);
970          json_writer_str(vmdesc, "name", "data");
971          json_writer_int64(vmdesc, "size", size);
972          json_writer_str(vmdesc, "type", "buffer");
973          json_writer_end_object(vmdesc);
974          json_writer_end_array(vmdesc);
975      }
976  }
977  
978  /*
979   * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
980   */
981  static void save_section_header(QEMUFile *f, SaveStateEntry *se,
982                                  uint8_t section_type)
983  {
984      qemu_put_byte(f, section_type);
985      qemu_put_be32(f, se->section_id);
986  
987      if (section_type == QEMU_VM_SECTION_FULL ||
988          section_type == QEMU_VM_SECTION_START) {
989          /* ID string */
990          size_t len = strlen(se->idstr);
991          qemu_put_byte(f, len);
992          qemu_put_buffer(f, (uint8_t *)se->idstr, len);
993  
994          qemu_put_be32(f, se->instance_id);
995          qemu_put_be32(f, se->version_id);
996      }
997  }
998  
999  /*
1000   * Write a footer onto device sections that catches cases misformatted device
1001   * sections.
1002   */
1003  static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
1004  {
1005      if (migrate_get_current()->send_section_footer) {
1006          qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
1007          qemu_put_be32(f, se->section_id);
1008      }
1009  }
1010  
1011  static int vmstate_save(QEMUFile *f, SaveStateEntry *se, JSONWriter *vmdesc)
1012  {
1013      int ret;
1014      Error *local_err = NULL;
1015      MigrationState *s = migrate_get_current();
1016  
1017      if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
1018          return 0;
1019      }
1020      if (se->vmsd && !vmstate_section_needed(se->vmsd, se->opaque)) {
1021          trace_savevm_section_skip(se->idstr, se->section_id);
1022          return 0;
1023      }
1024  
1025      trace_savevm_section_start(se->idstr, se->section_id);
1026      save_section_header(f, se, QEMU_VM_SECTION_FULL);
1027      if (vmdesc) {
1028          json_writer_start_object(vmdesc, NULL);
1029          json_writer_str(vmdesc, "name", se->idstr);
1030          json_writer_int64(vmdesc, "instance_id", se->instance_id);
1031      }
1032  
1033      trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
1034      if (!se->vmsd) {
1035          vmstate_save_old_style(f, se, vmdesc);
1036      } else {
1037          ret = vmstate_save_state_with_err(f, se->vmsd, se->opaque, vmdesc, &local_err);
1038          if (ret) {
1039              migrate_set_error(s, local_err);
1040              error_report_err(local_err);
1041              return ret;
1042          }
1043      }
1044  
1045      trace_savevm_section_end(se->idstr, se->section_id, 0);
1046      save_section_footer(f, se);
1047      if (vmdesc) {
1048          json_writer_end_object(vmdesc);
1049      }
1050      return 0;
1051  }
1052  /**
1053   * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
1054   *                           command and associated data.
1055   *
1056   * @f: File to send command on
1057   * @command: Command type to send
1058   * @len: Length of associated data
1059   * @data: Data associated with command.
1060   */
1061  static void qemu_savevm_command_send(QEMUFile *f,
1062                                       enum qemu_vm_cmd command,
1063                                       uint16_t len,
1064                                       uint8_t *data)
1065  {
1066      trace_savevm_command_send(command, len);
1067      qemu_put_byte(f, QEMU_VM_COMMAND);
1068      qemu_put_be16(f, (uint16_t)command);
1069      qemu_put_be16(f, len);
1070      qemu_put_buffer(f, data, len);
1071      qemu_fflush(f);
1072  }
1073  
1074  void qemu_savevm_send_colo_enable(QEMUFile *f)
1075  {
1076      trace_savevm_send_colo_enable();
1077      qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL);
1078  }
1079  
1080  void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
1081  {
1082      uint32_t buf;
1083  
1084      trace_savevm_send_ping(value);
1085      buf = cpu_to_be32(value);
1086      qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf);
1087  }
1088  
1089  void qemu_savevm_send_open_return_path(QEMUFile *f)
1090  {
1091      trace_savevm_send_open_return_path();
1092      qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
1093  }
1094  
1095  /* We have a buffer of data to send; we don't want that all to be loaded
1096   * by the command itself, so the command contains just the length of the
1097   * extra buffer that we then send straight after it.
1098   * TODO: Must be a better way to organise that
1099   *
1100   * Returns:
1101   *    0 on success
1102   *    -ve on error
1103   */
1104  int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
1105  {
1106      uint32_t tmp;
1107      MigrationState *ms = migrate_get_current();
1108      Error *local_err = NULL;
1109  
1110      if (len > MAX_VM_CMD_PACKAGED_SIZE) {
1111          error_setg(&local_err, "%s: Unreasonably large packaged state: %zu",
1112                       __func__, len);
1113          migrate_set_error(ms, local_err);
1114          error_report_err(local_err);
1115          return -1;
1116      }
1117  
1118      tmp = cpu_to_be32(len);
1119  
1120      trace_qemu_savevm_send_packaged();
1121      qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
1122  
1123      qemu_put_buffer(f, buf, len);
1124  
1125      return 0;
1126  }
1127  
1128  /* Send prior to any postcopy transfer */
1129  void qemu_savevm_send_postcopy_advise(QEMUFile *f)
1130  {
1131      if (migrate_postcopy_ram()) {
1132          uint64_t tmp[2];
1133          tmp[0] = cpu_to_be64(ram_pagesize_summary());
1134          tmp[1] = cpu_to_be64(qemu_target_page_size());
1135  
1136          trace_qemu_savevm_send_postcopy_advise();
1137          qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE,
1138                                   16, (uint8_t *)tmp);
1139      } else {
1140          qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 0, NULL);
1141      }
1142  }
1143  
1144  /* Sent prior to starting the destination running in postcopy, discard pages
1145   * that have already been sent but redirtied on the source.
1146   * CMD_POSTCOPY_RAM_DISCARD consist of:
1147   *      byte   version (0)
1148   *      byte   Length of name field (not including 0)
1149   *  n x byte   RAM block name
1150   *      byte   0 terminator (just for safety)
1151   *  n x        Byte ranges within the named RAMBlock
1152   *      be64   Start of the range
1153   *      be64   Length
1154   *
1155   *  name:  RAMBlock name that these entries are part of
1156   *  len: Number of page entries
1157   *  start_list: 'len' addresses
1158   *  length_list: 'len' addresses
1159   *
1160   */
1161  void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
1162                                             uint16_t len,
1163                                             uint64_t *start_list,
1164                                             uint64_t *length_list)
1165  {
1166      uint8_t *buf;
1167      uint16_t tmplen;
1168      uint16_t t;
1169      size_t name_len = strlen(name);
1170  
1171      trace_qemu_savevm_send_postcopy_ram_discard(name, len);
1172      assert(name_len < 256);
1173      buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len);
1174      buf[0] = postcopy_ram_discard_version;
1175      buf[1] = name_len;
1176      memcpy(buf + 2, name, name_len);
1177      tmplen = 2 + name_len;
1178      buf[tmplen++] = '\0';
1179  
1180      for (t = 0; t < len; t++) {
1181          stq_be_p(buf + tmplen, start_list[t]);
1182          tmplen += 8;
1183          stq_be_p(buf + tmplen, length_list[t]);
1184          tmplen += 8;
1185      }
1186      qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
1187      g_free(buf);
1188  }
1189  
1190  /* Get the destination into a state where it can receive postcopy data. */
1191  void qemu_savevm_send_postcopy_listen(QEMUFile *f)
1192  {
1193      trace_savevm_send_postcopy_listen();
1194      qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
1195  }
1196  
1197  /* Kick the destination into running */
1198  void qemu_savevm_send_postcopy_run(QEMUFile *f)
1199  {
1200      trace_savevm_send_postcopy_run();
1201      qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
1202  }
1203  
1204  void qemu_savevm_send_postcopy_resume(QEMUFile *f)
1205  {
1206      trace_savevm_send_postcopy_resume();
1207      qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RESUME, 0, NULL);
1208  }
1209  
1210  void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name)
1211  {
1212      size_t len;
1213      char buf[256];
1214  
1215      trace_savevm_send_recv_bitmap(block_name);
1216  
1217      buf[0] = len = strlen(block_name);
1218      memcpy(buf + 1, block_name, len);
1219  
1220      qemu_savevm_command_send(f, MIG_CMD_RECV_BITMAP, len + 1, (uint8_t *)buf);
1221  }
1222  
1223  bool qemu_savevm_state_blocked(Error **errp)
1224  {
1225      SaveStateEntry *se;
1226  
1227      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1228          if (se->vmsd && se->vmsd->unmigratable) {
1229              error_setg(errp, "State blocked by non-migratable device '%s'",
1230                         se->idstr);
1231              return true;
1232          }
1233      }
1234      return false;
1235  }
1236  
1237  void qemu_savevm_non_migratable_list(strList **reasons)
1238  {
1239      SaveStateEntry *se;
1240  
1241      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1242          if (se->vmsd && se->vmsd->unmigratable) {
1243              QAPI_LIST_PREPEND(*reasons,
1244                                g_strdup_printf("non-migratable device: %s",
1245                                                se->idstr));
1246          }
1247      }
1248  }
1249  
1250  void qemu_savevm_state_header(QEMUFile *f)
1251  {
1252      MigrationState *s = migrate_get_current();
1253  
1254      s->vmdesc = json_writer_new(false);
1255  
1256      trace_savevm_state_header();
1257      qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
1258      qemu_put_be32(f, QEMU_VM_FILE_VERSION);
1259  
1260      if (s->send_configuration) {
1261          qemu_put_byte(f, QEMU_VM_CONFIGURATION);
1262  
1263          /*
1264           * This starts the main json object and is paired with the
1265           * json_writer_end_object in
1266           * qemu_savevm_state_complete_precopy_non_iterable
1267           */
1268          json_writer_start_object(s->vmdesc, NULL);
1269  
1270          json_writer_start_object(s->vmdesc, "configuration");
1271          vmstate_save_state(f, &vmstate_configuration, &savevm_state, s->vmdesc);
1272          json_writer_end_object(s->vmdesc);
1273      }
1274  }
1275  
1276  bool qemu_savevm_state_guest_unplug_pending(void)
1277  {
1278      SaveStateEntry *se;
1279  
1280      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1281          if (se->vmsd && se->vmsd->dev_unplug_pending &&
1282              se->vmsd->dev_unplug_pending(se->opaque)) {
1283              return true;
1284          }
1285      }
1286  
1287      return false;
1288  }
1289  
1290  int qemu_savevm_state_prepare(Error **errp)
1291  {
1292      SaveStateEntry *se;
1293      int ret;
1294  
1295      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1296          if (!se->ops || !se->ops->save_prepare) {
1297              continue;
1298          }
1299          if (se->ops->is_active) {
1300              if (!se->ops->is_active(se->opaque)) {
1301                  continue;
1302              }
1303          }
1304  
1305          ret = se->ops->save_prepare(se->opaque, errp);
1306          if (ret < 0) {
1307              return ret;
1308          }
1309      }
1310  
1311      return 0;
1312  }
1313  
1314  void qemu_savevm_state_setup(QEMUFile *f)
1315  {
1316      MigrationState *ms = migrate_get_current();
1317      SaveStateEntry *se;
1318      Error *local_err = NULL;
1319      int ret;
1320  
1321      json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
1322      json_writer_start_array(ms->vmdesc, "devices");
1323  
1324      trace_savevm_state_setup();
1325      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1326          if (se->vmsd && se->vmsd->early_setup) {
1327              ret = vmstate_save(f, se, ms->vmdesc);
1328              if (ret) {
1329                  qemu_file_set_error(f, ret);
1330                  break;
1331              }
1332              continue;
1333          }
1334  
1335          if (!se->ops || !se->ops->save_setup) {
1336              continue;
1337          }
1338          if (se->ops->is_active) {
1339              if (!se->ops->is_active(se->opaque)) {
1340                  continue;
1341              }
1342          }
1343          save_section_header(f, se, QEMU_VM_SECTION_START);
1344  
1345          ret = se->ops->save_setup(f, se->opaque);
1346          save_section_footer(f, se);
1347          if (ret < 0) {
1348              qemu_file_set_error(f, ret);
1349              break;
1350          }
1351      }
1352  
1353      if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
1354          error_report_err(local_err);
1355      }
1356  }
1357  
1358  int qemu_savevm_state_resume_prepare(MigrationState *s)
1359  {
1360      SaveStateEntry *se;
1361      int ret;
1362  
1363      trace_savevm_state_resume_prepare();
1364  
1365      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1366          if (!se->ops || !se->ops->resume_prepare) {
1367              continue;
1368          }
1369          if (se->ops->is_active) {
1370              if (!se->ops->is_active(se->opaque)) {
1371                  continue;
1372              }
1373          }
1374          ret = se->ops->resume_prepare(s, se->opaque);
1375          if (ret < 0) {
1376              return ret;
1377          }
1378      }
1379  
1380      return 0;
1381  }
1382  
1383  /*
1384   * this function has three return values:
1385   *   negative: there was one error, and we have -errno.
1386   *   0 : We haven't finished, caller have to go again
1387   *   1 : We have finished, we can go to complete phase
1388   */
1389  int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
1390  {
1391      SaveStateEntry *se;
1392      int ret = 1;
1393  
1394      trace_savevm_state_iterate();
1395      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1396          if (!se->ops || !se->ops->save_live_iterate) {
1397              continue;
1398          }
1399          if (se->ops->is_active &&
1400              !se->ops->is_active(se->opaque)) {
1401              continue;
1402          }
1403          if (se->ops->is_active_iterate &&
1404              !se->ops->is_active_iterate(se->opaque)) {
1405              continue;
1406          }
1407          /*
1408           * In the postcopy phase, any device that doesn't know how to
1409           * do postcopy should have saved it's state in the _complete
1410           * call that's already run, it might get confused if we call
1411           * iterate afterwards.
1412           */
1413          if (postcopy &&
1414              !(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) {
1415              continue;
1416          }
1417          if (migration_rate_exceeded(f)) {
1418              return 0;
1419          }
1420          trace_savevm_section_start(se->idstr, se->section_id);
1421  
1422          save_section_header(f, se, QEMU_VM_SECTION_PART);
1423  
1424          ret = se->ops->save_live_iterate(f, se->opaque);
1425          trace_savevm_section_end(se->idstr, se->section_id, ret);
1426          save_section_footer(f, se);
1427  
1428          if (ret < 0) {
1429              error_report("failed to save SaveStateEntry with id(name): "
1430                           "%d(%s): %d",
1431                           se->section_id, se->idstr, ret);
1432              qemu_file_set_error(f, ret);
1433          }
1434          if (ret <= 0) {
1435              /* Do not proceed to the next vmstate before this one reported
1436                 completion of the current stage. This serializes the migration
1437                 and reduces the probability that a faster changing state is
1438                 synchronized over and over again. */
1439              break;
1440          }
1441      }
1442      return ret;
1443  }
1444  
1445  static bool should_send_vmdesc(void)
1446  {
1447      MachineState *machine = MACHINE(qdev_get_machine());
1448      bool in_postcopy = migration_in_postcopy();
1449      return !machine->suppress_vmdesc && !in_postcopy;
1450  }
1451  
1452  /*
1453   * Calls the save_live_complete_postcopy methods
1454   * causing the last few pages to be sent immediately and doing any associated
1455   * cleanup.
1456   * Note postcopy also calls qemu_savevm_state_complete_precopy to complete
1457   * all the other devices, but that happens at the point we switch to postcopy.
1458   */
1459  void qemu_savevm_state_complete_postcopy(QEMUFile *f)
1460  {
1461      SaveStateEntry *se;
1462      int ret;
1463  
1464      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1465          if (!se->ops || !se->ops->save_live_complete_postcopy) {
1466              continue;
1467          }
1468          if (se->ops->is_active) {
1469              if (!se->ops->is_active(se->opaque)) {
1470                  continue;
1471              }
1472          }
1473          trace_savevm_section_start(se->idstr, se->section_id);
1474          /* Section type */
1475          qemu_put_byte(f, QEMU_VM_SECTION_END);
1476          qemu_put_be32(f, se->section_id);
1477  
1478          ret = se->ops->save_live_complete_postcopy(f, se->opaque);
1479          trace_savevm_section_end(se->idstr, se->section_id, ret);
1480          save_section_footer(f, se);
1481          if (ret < 0) {
1482              qemu_file_set_error(f, ret);
1483              return;
1484          }
1485      }
1486  
1487      qemu_put_byte(f, QEMU_VM_EOF);
1488      qemu_fflush(f);
1489  }
1490  
1491  static
1492  int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
1493  {
1494      int64_t start_ts_each, end_ts_each;
1495      SaveStateEntry *se;
1496      int ret;
1497  
1498      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1499          if (!se->ops ||
1500              (in_postcopy && se->ops->has_postcopy &&
1501               se->ops->has_postcopy(se->opaque)) ||
1502              !se->ops->save_live_complete_precopy) {
1503              continue;
1504          }
1505  
1506          if (se->ops->is_active) {
1507              if (!se->ops->is_active(se->opaque)) {
1508                  continue;
1509              }
1510          }
1511  
1512          start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
1513          trace_savevm_section_start(se->idstr, se->section_id);
1514  
1515          save_section_header(f, se, QEMU_VM_SECTION_END);
1516  
1517          ret = se->ops->save_live_complete_precopy(f, se->opaque);
1518          trace_savevm_section_end(se->idstr, se->section_id, ret);
1519          save_section_footer(f, se);
1520          if (ret < 0) {
1521              qemu_file_set_error(f, ret);
1522              return -1;
1523          }
1524          end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
1525          trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id,
1526                                      end_ts_each - start_ts_each);
1527      }
1528  
1529      trace_vmstate_downtime_checkpoint("src-iterable-saved");
1530  
1531      return 0;
1532  }
1533  
1534  int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
1535                                                      bool in_postcopy,
1536                                                      bool inactivate_disks)
1537  {
1538      MigrationState *ms = migrate_get_current();
1539      int64_t start_ts_each, end_ts_each;
1540      JSONWriter *vmdesc = ms->vmdesc;
1541      int vmdesc_len;
1542      SaveStateEntry *se;
1543      int ret;
1544  
1545      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1546          if (se->vmsd && se->vmsd->early_setup) {
1547              /* Already saved during qemu_savevm_state_setup(). */
1548              continue;
1549          }
1550  
1551          start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
1552  
1553          ret = vmstate_save(f, se, vmdesc);
1554          if (ret) {
1555              qemu_file_set_error(f, ret);
1556              return ret;
1557          }
1558  
1559          end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
1560          trace_vmstate_downtime_save("non-iterable", se->idstr, se->instance_id,
1561                                      end_ts_each - start_ts_each);
1562      }
1563  
1564      if (inactivate_disks) {
1565          /* Inactivate before sending QEMU_VM_EOF so that the
1566           * bdrv_activate_all() on the other end won't fail. */
1567          ret = bdrv_inactivate_all();
1568          if (ret) {
1569              Error *local_err = NULL;
1570              error_setg(&local_err, "%s: bdrv_inactivate_all() failed (%d)",
1571                         __func__, ret);
1572              migrate_set_error(ms, local_err);
1573              error_report_err(local_err);
1574              qemu_file_set_error(f, ret);
1575              return ret;
1576          }
1577      }
1578      if (!in_postcopy) {
1579          /* Postcopy stream will still be going */
1580          qemu_put_byte(f, QEMU_VM_EOF);
1581      }
1582  
1583      json_writer_end_array(vmdesc);
1584      json_writer_end_object(vmdesc);
1585      vmdesc_len = strlen(json_writer_get(vmdesc));
1586  
1587      if (should_send_vmdesc()) {
1588          qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
1589          qemu_put_be32(f, vmdesc_len);
1590          qemu_put_buffer(f, (uint8_t *)json_writer_get(vmdesc), vmdesc_len);
1591      }
1592  
1593      /* Free it now to detect any inconsistencies. */
1594      json_writer_free(vmdesc);
1595      ms->vmdesc = NULL;
1596  
1597      trace_vmstate_downtime_checkpoint("src-non-iterable-saved");
1598  
1599      return 0;
1600  }
1601  
1602  int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
1603                                         bool inactivate_disks)
1604  {
1605      int ret;
1606      Error *local_err = NULL;
1607      bool in_postcopy = migration_in_postcopy();
1608  
1609      if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
1610          error_report_err(local_err);
1611      }
1612  
1613      trace_savevm_state_complete_precopy();
1614  
1615      cpu_synchronize_all_states();
1616  
1617      if (!in_postcopy || iterable_only) {
1618          ret = qemu_savevm_state_complete_precopy_iterable(f, in_postcopy);
1619          if (ret) {
1620              return ret;
1621          }
1622      }
1623  
1624      if (iterable_only) {
1625          goto flush;
1626      }
1627  
1628      ret = qemu_savevm_state_complete_precopy_non_iterable(f, in_postcopy,
1629                                                            inactivate_disks);
1630      if (ret) {
1631          return ret;
1632      }
1633  
1634  flush:
1635      return qemu_fflush(f);
1636  }
1637  
1638  /* Give an estimate of the amount left to be transferred,
1639   * the result is split into the amount for units that can and
1640   * for units that can't do postcopy.
1641   */
1642  void qemu_savevm_state_pending_estimate(uint64_t *must_precopy,
1643                                          uint64_t *can_postcopy)
1644  {
1645      SaveStateEntry *se;
1646  
1647      *must_precopy = 0;
1648      *can_postcopy = 0;
1649  
1650      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1651          if (!se->ops || !se->ops->state_pending_estimate) {
1652              continue;
1653          }
1654          if (se->ops->is_active) {
1655              if (!se->ops->is_active(se->opaque)) {
1656                  continue;
1657              }
1658          }
1659          se->ops->state_pending_estimate(se->opaque, must_precopy, can_postcopy);
1660      }
1661  }
1662  
1663  void qemu_savevm_state_pending_exact(uint64_t *must_precopy,
1664                                       uint64_t *can_postcopy)
1665  {
1666      SaveStateEntry *se;
1667  
1668      *must_precopy = 0;
1669      *can_postcopy = 0;
1670  
1671      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1672          if (!se->ops || !se->ops->state_pending_exact) {
1673              continue;
1674          }
1675          if (se->ops->is_active) {
1676              if (!se->ops->is_active(se->opaque)) {
1677                  continue;
1678              }
1679          }
1680          se->ops->state_pending_exact(se->opaque, must_precopy, can_postcopy);
1681      }
1682  }
1683  
1684  void qemu_savevm_state_cleanup(void)
1685  {
1686      SaveStateEntry *se;
1687      Error *local_err = NULL;
1688  
1689      if (precopy_notify(PRECOPY_NOTIFY_CLEANUP, &local_err)) {
1690          error_report_err(local_err);
1691      }
1692  
1693      trace_savevm_state_cleanup();
1694      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1695          if (se->ops && se->ops->save_cleanup) {
1696              se->ops->save_cleanup(se->opaque);
1697          }
1698      }
1699  }
1700  
1701  static int qemu_savevm_state(QEMUFile *f, Error **errp)
1702  {
1703      int ret;
1704      MigrationState *ms = migrate_get_current();
1705      MigrationStatus status;
1706  
1707      if (migration_is_running(ms->state)) {
1708          error_setg(errp, QERR_MIGRATION_ACTIVE);
1709          return -EINVAL;
1710      }
1711  
1712      if (migrate_block()) {
1713          error_setg(errp, "Block migration and snapshots are incompatible");
1714          return -EINVAL;
1715      }
1716  
1717      ret = migrate_init(ms, errp);
1718      if (ret) {
1719          return ret;
1720      }
1721      ms->to_dst_file = f;
1722  
1723      qemu_savevm_state_header(f);
1724      qemu_savevm_state_setup(f);
1725  
1726      while (qemu_file_get_error(f) == 0) {
1727          if (qemu_savevm_state_iterate(f, false) > 0) {
1728              break;
1729          }
1730      }
1731  
1732      ret = qemu_file_get_error(f);
1733      if (ret == 0) {
1734          qemu_savevm_state_complete_precopy(f, false, false);
1735          ret = qemu_file_get_error(f);
1736      }
1737      qemu_savevm_state_cleanup();
1738      if (ret != 0) {
1739          error_setg_errno(errp, -ret, "Error while writing VM state");
1740      }
1741  
1742      if (ret != 0) {
1743          status = MIGRATION_STATUS_FAILED;
1744      } else {
1745          status = MIGRATION_STATUS_COMPLETED;
1746      }
1747      migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
1748  
1749      /* f is outer parameter, it should not stay in global migration state after
1750       * this function finished */
1751      ms->to_dst_file = NULL;
1752  
1753      return ret;
1754  }
1755  
1756  void qemu_savevm_live_state(QEMUFile *f)
1757  {
1758      /* save QEMU_VM_SECTION_END section */
1759      qemu_savevm_state_complete_precopy(f, true, false);
1760      qemu_put_byte(f, QEMU_VM_EOF);
1761  }
1762  
1763  int qemu_save_device_state(QEMUFile *f)
1764  {
1765      SaveStateEntry *se;
1766  
1767      if (!migration_in_colo_state()) {
1768          qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
1769          qemu_put_be32(f, QEMU_VM_FILE_VERSION);
1770      }
1771      cpu_synchronize_all_states();
1772  
1773      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1774          int ret;
1775  
1776          if (se->is_ram) {
1777              continue;
1778          }
1779          ret = vmstate_save(f, se, NULL);
1780          if (ret) {
1781              return ret;
1782          }
1783      }
1784  
1785      qemu_put_byte(f, QEMU_VM_EOF);
1786  
1787      return qemu_file_get_error(f);
1788  }
1789  
1790  static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id)
1791  {
1792      SaveStateEntry *se;
1793  
1794      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1795          if (!strcmp(se->idstr, idstr) &&
1796              (instance_id == se->instance_id ||
1797               instance_id == se->alias_id))
1798              return se;
1799          /* Migrating from an older version? */
1800          if (strstr(se->idstr, idstr) && se->compat) {
1801              if (!strcmp(se->compat->idstr, idstr) &&
1802                  (instance_id == se->compat->instance_id ||
1803                   instance_id == se->alias_id))
1804                  return se;
1805          }
1806      }
1807      return NULL;
1808  }
1809  
1810  enum LoadVMExitCodes {
1811      /* Allow a command to quit all layers of nested loadvm loops */
1812      LOADVM_QUIT     =  1,
1813  };
1814  
1815  /* ------ incoming postcopy messages ------ */
1816  /* 'advise' arrives before any transfers just to tell us that a postcopy
1817   * *might* happen - it might be skipped if precopy transferred everything
1818   * quickly.
1819   */
1820  static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
1821                                           uint16_t len)
1822  {
1823      PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
1824      uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
1825      size_t page_size = qemu_target_page_size();
1826      Error *local_err = NULL;
1827  
1828      trace_loadvm_postcopy_handle_advise();
1829      if (ps != POSTCOPY_INCOMING_NONE) {
1830          error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
1831          return -1;
1832      }
1833  
1834      switch (len) {
1835      case 0:
1836          if (migrate_postcopy_ram()) {
1837              error_report("RAM postcopy is enabled but have 0 byte advise");
1838              return -EINVAL;
1839          }
1840          return 0;
1841      case 8 + 8:
1842          if (!migrate_postcopy_ram()) {
1843              error_report("RAM postcopy is disabled but have 16 byte advise");
1844              return -EINVAL;
1845          }
1846          break;
1847      default:
1848          error_report("CMD_POSTCOPY_ADVISE invalid length (%d)", len);
1849          return -EINVAL;
1850      }
1851  
1852      if (!postcopy_ram_supported_by_host(mis, &local_err)) {
1853          error_report_err(local_err);
1854          postcopy_state_set(POSTCOPY_INCOMING_NONE);
1855          return -1;
1856      }
1857  
1858      remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
1859      local_pagesize_summary = ram_pagesize_summary();
1860  
1861      if (remote_pagesize_summary != local_pagesize_summary)  {
1862          /*
1863           * This detects two potential causes of mismatch:
1864           *   a) A mismatch in host page sizes
1865           *      Some combinations of mismatch are probably possible but it gets
1866           *      a bit more complicated.  In particular we need to place whole
1867           *      host pages on the dest at once, and we need to ensure that we
1868           *      handle dirtying to make sure we never end up sending part of
1869           *      a hostpage on it's own.
1870           *   b) The use of different huge page sizes on source/destination
1871           *      a more fine grain test is performed during RAM block migration
1872           *      but this test here causes a nice early clear failure, and
1873           *      also fails when passed to an older qemu that doesn't
1874           *      do huge pages.
1875           */
1876          error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
1877                                                               " d=%" PRIx64 ")",
1878                       remote_pagesize_summary, local_pagesize_summary);
1879          return -1;
1880      }
1881  
1882      remote_tps = qemu_get_be64(mis->from_src_file);
1883      if (remote_tps != page_size) {
1884          /*
1885           * Again, some differences could be dealt with, but for now keep it
1886           * simple.
1887           */
1888          error_report("Postcopy needs matching target page sizes (s=%d d=%zd)",
1889                       (int)remote_tps, page_size);
1890          return -1;
1891      }
1892  
1893      if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) {
1894          error_report_err(local_err);
1895          return -1;
1896      }
1897  
1898      if (ram_postcopy_incoming_init(mis)) {
1899          return -1;
1900      }
1901  
1902      return 0;
1903  }
1904  
1905  /* After postcopy we will be told to throw some pages away since they're
1906   * dirty and will have to be demand fetched.  Must happen before CPU is
1907   * started.
1908   * There can be 0..many of these messages, each encoding multiple pages.
1909   */
1910  static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
1911                                                uint16_t len)
1912  {
1913      int tmp;
1914      char ramid[256];
1915      PostcopyState ps = postcopy_state_get();
1916  
1917      trace_loadvm_postcopy_ram_handle_discard();
1918  
1919      switch (ps) {
1920      case POSTCOPY_INCOMING_ADVISE:
1921          /* 1st discard */
1922          tmp = postcopy_ram_prepare_discard(mis);
1923          if (tmp) {
1924              return tmp;
1925          }
1926          break;
1927  
1928      case POSTCOPY_INCOMING_DISCARD:
1929          /* Expected state */
1930          break;
1931  
1932      default:
1933          error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
1934                       ps);
1935          return -1;
1936      }
1937      /* We're expecting a
1938       *    Version (0)
1939       *    a RAM ID string (length byte, name, 0 term)
1940       *    then at least 1 16 byte chunk
1941      */
1942      if (len < (1 + 1 + 1 + 1 + 2 * 8)) {
1943          error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1944          return -1;
1945      }
1946  
1947      tmp = qemu_get_byte(mis->from_src_file);
1948      if (tmp != postcopy_ram_discard_version) {
1949          error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
1950          return -1;
1951      }
1952  
1953      if (!qemu_get_counted_string(mis->from_src_file, ramid)) {
1954          error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
1955          return -1;
1956      }
1957      tmp = qemu_get_byte(mis->from_src_file);
1958      if (tmp != 0) {
1959          error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp);
1960          return -1;
1961      }
1962  
1963      len -= 3 + strlen(ramid);
1964      if (len % 16) {
1965          error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1966          return -1;
1967      }
1968      trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
1969      while (len) {
1970          uint64_t start_addr, block_length;
1971          start_addr = qemu_get_be64(mis->from_src_file);
1972          block_length = qemu_get_be64(mis->from_src_file);
1973  
1974          len -= 16;
1975          int ret = ram_discard_range(ramid, start_addr, block_length);
1976          if (ret) {
1977              return ret;
1978          }
1979      }
1980      trace_loadvm_postcopy_ram_handle_discard_end();
1981  
1982      return 0;
1983  }
1984  
1985  /*
1986   * Triggered by a postcopy_listen command; this thread takes over reading
1987   * the input stream, leaving the main thread free to carry on loading the rest
1988   * of the device state (from RAM).
1989   * (TODO:This could do with being in a postcopy file - but there again it's
1990   * just another input loop, not that postcopy specific)
1991   */
1992  static void *postcopy_ram_listen_thread(void *opaque)
1993  {
1994      MigrationIncomingState *mis = migration_incoming_get_current();
1995      QEMUFile *f = mis->from_src_file;
1996      int load_res;
1997      MigrationState *migr = migrate_get_current();
1998  
1999      object_ref(OBJECT(migr));
2000  
2001      migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
2002                                     MIGRATION_STATUS_POSTCOPY_ACTIVE);
2003      qemu_sem_post(&mis->thread_sync_sem);
2004      trace_postcopy_ram_listen_thread_start();
2005  
2006      rcu_register_thread();
2007      /*
2008       * Because we're a thread and not a coroutine we can't yield
2009       * in qemu_file, and thus we must be blocking now.
2010       */
2011      qemu_file_set_blocking(f, true);
2012      load_res = qemu_loadvm_state_main(f, mis);
2013  
2014      /*
2015       * This is tricky, but, mis->from_src_file can change after it
2016       * returns, when postcopy recovery happened. In the future, we may
2017       * want a wrapper for the QEMUFile handle.
2018       */
2019      f = mis->from_src_file;
2020  
2021      /* And non-blocking again so we don't block in any cleanup */
2022      qemu_file_set_blocking(f, false);
2023  
2024      trace_postcopy_ram_listen_thread_exit();
2025      if (load_res < 0) {
2026          qemu_file_set_error(f, load_res);
2027          dirty_bitmap_mig_cancel_incoming();
2028          if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
2029              !migrate_postcopy_ram() && migrate_dirty_bitmaps())
2030          {
2031              error_report("%s: loadvm failed during postcopy: %d. All states "
2032                           "are migrated except dirty bitmaps. Some dirty "
2033                           "bitmaps may be lost, and present migrated dirty "
2034                           "bitmaps are correctly migrated and valid.",
2035                           __func__, load_res);
2036              load_res = 0; /* prevent further exit() */
2037          } else {
2038              error_report("%s: loadvm failed: %d", __func__, load_res);
2039              migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2040                                             MIGRATION_STATUS_FAILED);
2041          }
2042      }
2043      if (load_res >= 0) {
2044          /*
2045           * This looks good, but it's possible that the device loading in the
2046           * main thread hasn't finished yet, and so we might not be in 'RUN'
2047           * state yet; wait for the end of the main thread.
2048           */
2049          qemu_event_wait(&mis->main_thread_load_event);
2050      }
2051      postcopy_ram_incoming_cleanup(mis);
2052  
2053      if (load_res < 0) {
2054          /*
2055           * If something went wrong then we have a bad state so exit;
2056           * depending how far we got it might be possible at this point
2057           * to leave the guest running and fire MCEs for pages that never
2058           * arrived as a desperate recovery step.
2059           */
2060          rcu_unregister_thread();
2061          exit(EXIT_FAILURE);
2062      }
2063  
2064      migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2065                                     MIGRATION_STATUS_COMPLETED);
2066      /*
2067       * If everything has worked fine, then the main thread has waited
2068       * for us to start, and we're the last use of the mis.
2069       * (If something broke then qemu will have to exit anyway since it's
2070       * got a bad migration state).
2071       */
2072      migration_incoming_state_destroy();
2073      qemu_loadvm_state_cleanup();
2074  
2075      rcu_unregister_thread();
2076      mis->have_listen_thread = false;
2077      postcopy_state_set(POSTCOPY_INCOMING_END);
2078  
2079      object_unref(OBJECT(migr));
2080  
2081      return NULL;
2082  }
2083  
2084  /* After this message we must be able to immediately receive postcopy data */
2085  static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
2086  {
2087      PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
2088      Error *local_err = NULL;
2089  
2090      trace_loadvm_postcopy_handle_listen("enter");
2091  
2092      if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
2093          error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
2094          return -1;
2095      }
2096      if (ps == POSTCOPY_INCOMING_ADVISE) {
2097          /*
2098           * A rare case, we entered listen without having to do any discards,
2099           * so do the setup that's normally done at the time of the 1st discard.
2100           */
2101          if (migrate_postcopy_ram()) {
2102              postcopy_ram_prepare_discard(mis);
2103          }
2104      }
2105  
2106      trace_loadvm_postcopy_handle_listen("after discard");
2107  
2108      /*
2109       * Sensitise RAM - can now generate requests for blocks that don't exist
2110       * However, at this point the CPU shouldn't be running, and the IO
2111       * shouldn't be doing anything yet so don't actually expect requests
2112       */
2113      if (migrate_postcopy_ram()) {
2114          if (postcopy_ram_incoming_setup(mis)) {
2115              postcopy_ram_incoming_cleanup(mis);
2116              return -1;
2117          }
2118      }
2119  
2120      trace_loadvm_postcopy_handle_listen("after uffd");
2121  
2122      if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) {
2123          error_report_err(local_err);
2124          return -1;
2125      }
2126  
2127      mis->have_listen_thread = true;
2128      postcopy_thread_create(mis, &mis->listen_thread, "postcopy/listen",
2129                             postcopy_ram_listen_thread, QEMU_THREAD_DETACHED);
2130      trace_loadvm_postcopy_handle_listen("return");
2131  
2132      return 0;
2133  }
2134  
2135  static void loadvm_postcopy_handle_run_bh(void *opaque)
2136  {
2137      Error *local_err = NULL;
2138      MigrationIncomingState *mis = opaque;
2139  
2140      trace_vmstate_downtime_checkpoint("dst-postcopy-bh-enter");
2141  
2142      /* TODO we should move all of this lot into postcopy_ram.c or a shared code
2143       * in migration.c
2144       */
2145      cpu_synchronize_all_post_init();
2146  
2147      trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cpu-synced");
2148  
2149      qemu_announce_self(&mis->announce_timer, migrate_announce_params());
2150  
2151      trace_vmstate_downtime_checkpoint("dst-postcopy-bh-announced");
2152  
2153      /* Make sure all file formats throw away their mutable metadata.
2154       * If we get an error here, just don't restart the VM yet. */
2155      bdrv_activate_all(&local_err);
2156      if (local_err) {
2157          error_report_err(local_err);
2158          local_err = NULL;
2159          autostart = false;
2160      }
2161  
2162      trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cache-invalidated");
2163  
2164      dirty_bitmap_mig_before_vm_start();
2165  
2166      if (autostart) {
2167          /* Hold onto your hats, starting the CPU */
2168          vm_start();
2169      } else {
2170          /* leave it paused and let management decide when to start the CPU */
2171          runstate_set(RUN_STATE_PAUSED);
2172      }
2173  
2174      qemu_bh_delete(mis->bh);
2175  
2176      trace_vmstate_downtime_checkpoint("dst-postcopy-bh-vm-started");
2177  }
2178  
2179  /* After all discards we can start running and asking for pages */
2180  static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
2181  {
2182      PostcopyState ps = postcopy_state_get();
2183  
2184      trace_loadvm_postcopy_handle_run();
2185      if (ps != POSTCOPY_INCOMING_LISTENING) {
2186          error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
2187          return -1;
2188      }
2189  
2190      postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
2191      mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis);
2192      qemu_bh_schedule(mis->bh);
2193  
2194      /* We need to finish reading the stream from the package
2195       * and also stop reading anything more from the stream that loaded the
2196       * package (since it's now being read by the listener thread).
2197       * LOADVM_QUIT will quit all the layers of nested loadvm loops.
2198       */
2199      return LOADVM_QUIT;
2200  }
2201  
2202  /* We must be with page_request_mutex held */
2203  static gboolean postcopy_sync_page_req(gpointer key, gpointer value,
2204                                         gpointer data)
2205  {
2206      MigrationIncomingState *mis = data;
2207      void *host_addr = (void *) key;
2208      ram_addr_t rb_offset;
2209      RAMBlock *rb;
2210      int ret;
2211  
2212      rb = qemu_ram_block_from_host(host_addr, true, &rb_offset);
2213      if (!rb) {
2214          /*
2215           * This should _never_ happen.  However be nice for a migrating VM to
2216           * not crash/assert.  Post an error (note: intended to not use *_once
2217           * because we do want to see all the illegal addresses; and this can
2218           * never be triggered by the guest so we're safe) and move on next.
2219           */
2220          error_report("%s: illegal host addr %p", __func__, host_addr);
2221          /* Try the next entry */
2222          return FALSE;
2223      }
2224  
2225      ret = migrate_send_rp_message_req_pages(mis, rb, rb_offset);
2226      if (ret) {
2227          /* Please refer to above comment. */
2228          error_report("%s: send rp message failed for addr %p",
2229                       __func__, host_addr);
2230          return FALSE;
2231      }
2232  
2233      trace_postcopy_page_req_sync(host_addr);
2234  
2235      return FALSE;
2236  }
2237  
2238  static void migrate_send_rp_req_pages_pending(MigrationIncomingState *mis)
2239  {
2240      WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
2241          g_tree_foreach(mis->page_requested, postcopy_sync_page_req, mis);
2242      }
2243  }
2244  
2245  static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
2246  {
2247      if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
2248          error_report("%s: illegal resume received", __func__);
2249          /* Don't fail the load, only for this. */
2250          return 0;
2251      }
2252  
2253      /*
2254       * Reset the last_rb before we resend any page req to source again, since
2255       * the source should have it reset already.
2256       */
2257      mis->last_rb = NULL;
2258  
2259      /*
2260       * This means source VM is ready to resume the postcopy migration.
2261       */
2262      migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
2263                        MIGRATION_STATUS_POSTCOPY_ACTIVE);
2264  
2265      trace_loadvm_postcopy_handle_resume();
2266  
2267      /* Tell source that "we are ready" */
2268      migrate_send_rp_resume_ack(mis, MIGRATION_RESUME_ACK_VALUE);
2269  
2270      /*
2271       * After a postcopy recovery, the source should have lost the postcopy
2272       * queue, or potentially the requested pages could have been lost during
2273       * the network down phase.  Let's re-sync with the source VM by re-sending
2274       * all the pending pages that we eagerly need, so these threads won't get
2275       * blocked too long due to the recovery.
2276       *
2277       * Without this procedure, the faulted destination VM threads (waiting for
2278       * page requests right before the postcopy is interrupted) can keep hanging
2279       * until the pages are sent by the source during the background copying of
2280       * pages, or another thread faulted on the same address accidentally.
2281       */
2282      migrate_send_rp_req_pages_pending(mis);
2283  
2284      /*
2285       * It's time to switch state and release the fault thread to continue
2286       * service page faults.  Note that this should be explicitly after the
2287       * above call to migrate_send_rp_req_pages_pending().  In short:
2288       * migrate_send_rp_message_req_pages() is not thread safe, yet.
2289       */
2290      qemu_sem_post(&mis->postcopy_pause_sem_fault);
2291  
2292      if (migrate_postcopy_preempt()) {
2293          /*
2294           * The preempt channel will be created in async manner, now let's
2295           * wait for it and make sure it's created.
2296           */
2297          qemu_sem_wait(&mis->postcopy_qemufile_dst_done);
2298          assert(mis->postcopy_qemufile_dst);
2299          /* Kick the fast ram load thread too */
2300          qemu_sem_post(&mis->postcopy_pause_sem_fast_load);
2301      }
2302  
2303      return 0;
2304  }
2305  
2306  /**
2307   * Immediately following this command is a blob of data containing an embedded
2308   * chunk of migration stream; read it and load it.
2309   *
2310   * @mis: Incoming state
2311   * @length: Length of packaged data to read
2312   *
2313   * Returns: Negative values on error
2314   *
2315   */
2316  static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
2317  {
2318      int ret;
2319      size_t length;
2320      QIOChannelBuffer *bioc;
2321  
2322      length = qemu_get_be32(mis->from_src_file);
2323      trace_loadvm_handle_cmd_packaged(length);
2324  
2325      if (length > MAX_VM_CMD_PACKAGED_SIZE) {
2326          error_report("Unreasonably large packaged state: %zu", length);
2327          return -1;
2328      }
2329  
2330      bioc = qio_channel_buffer_new(length);
2331      qio_channel_set_name(QIO_CHANNEL(bioc), "migration-loadvm-buffer");
2332      ret = qemu_get_buffer(mis->from_src_file,
2333                            bioc->data,
2334                            length);
2335      if (ret != length) {
2336          object_unref(OBJECT(bioc));
2337          error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%zu",
2338                       ret, length);
2339          return (ret < 0) ? ret : -EAGAIN;
2340      }
2341      bioc->usage += length;
2342      trace_loadvm_handle_cmd_packaged_received(ret);
2343  
2344      QEMUFile *packf = qemu_file_new_input(QIO_CHANNEL(bioc));
2345  
2346      ret = qemu_loadvm_state_main(packf, mis);
2347      trace_loadvm_handle_cmd_packaged_main(ret);
2348      qemu_fclose(packf);
2349      object_unref(OBJECT(bioc));
2350  
2351      return ret;
2352  }
2353  
2354  /*
2355   * Handle request that source requests for recved_bitmap on
2356   * destination. Payload format:
2357   *
2358   * len (1 byte) + ramblock_name (<255 bytes)
2359   */
2360  static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
2361                                       uint16_t len)
2362  {
2363      QEMUFile *file = mis->from_src_file;
2364      RAMBlock *rb;
2365      char block_name[256];
2366      size_t cnt;
2367  
2368      cnt = qemu_get_counted_string(file, block_name);
2369      if (!cnt) {
2370          error_report("%s: failed to read block name", __func__);
2371          return -EINVAL;
2372      }
2373  
2374      /* Validate before using the data */
2375      if (qemu_file_get_error(file)) {
2376          return qemu_file_get_error(file);
2377      }
2378  
2379      if (len != cnt + 1) {
2380          error_report("%s: invalid payload length (%d)", __func__, len);
2381          return -EINVAL;
2382      }
2383  
2384      rb = qemu_ram_block_by_name(block_name);
2385      if (!rb) {
2386          error_report("%s: block '%s' not found", __func__, block_name);
2387          return -EINVAL;
2388      }
2389  
2390      migrate_send_rp_recv_bitmap(mis, block_name);
2391  
2392      trace_loadvm_handle_recv_bitmap(block_name);
2393  
2394      return 0;
2395  }
2396  
2397  static int loadvm_process_enable_colo(MigrationIncomingState *mis)
2398  {
2399      int ret = migration_incoming_enable_colo();
2400  
2401      if (!ret) {
2402          ret = colo_init_ram_cache();
2403          if (ret) {
2404              migration_incoming_disable_colo();
2405          }
2406      }
2407      return ret;
2408  }
2409  
2410  /*
2411   * Process an incoming 'QEMU_VM_COMMAND'
2412   * 0           just a normal return
2413   * LOADVM_QUIT All good, but exit the loop
2414   * <0          Error
2415   */
2416  static int loadvm_process_command(QEMUFile *f)
2417  {
2418      MigrationIncomingState *mis = migration_incoming_get_current();
2419      uint16_t cmd;
2420      uint16_t len;
2421      uint32_t tmp32;
2422  
2423      cmd = qemu_get_be16(f);
2424      len = qemu_get_be16(f);
2425  
2426      /* Check validity before continue processing of cmds */
2427      if (qemu_file_get_error(f)) {
2428          return qemu_file_get_error(f);
2429      }
2430  
2431      if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
2432          error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
2433          return -EINVAL;
2434      }
2435  
2436      trace_loadvm_process_command(mig_cmd_args[cmd].name, len);
2437  
2438      if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
2439          error_report("%s received with bad length - expecting %zu, got %d",
2440                       mig_cmd_args[cmd].name,
2441                       (size_t)mig_cmd_args[cmd].len, len);
2442          return -ERANGE;
2443      }
2444  
2445      switch (cmd) {
2446      case MIG_CMD_OPEN_RETURN_PATH:
2447          if (mis->to_src_file) {
2448              error_report("CMD_OPEN_RETURN_PATH called when RP already open");
2449              /* Not really a problem, so don't give up */
2450              return 0;
2451          }
2452          mis->to_src_file = qemu_file_get_return_path(f);
2453          if (!mis->to_src_file) {
2454              error_report("CMD_OPEN_RETURN_PATH failed");
2455              return -1;
2456          }
2457  
2458          /*
2459           * Switchover ack is enabled but no device uses it, so send an ACK to
2460           * source that it's OK to switchover. Do it here, after return path has
2461           * been created.
2462           */
2463          if (migrate_switchover_ack() && !mis->switchover_ack_pending_num) {
2464              int ret = migrate_send_rp_switchover_ack(mis);
2465              if (ret) {
2466                  error_report(
2467                      "Could not send switchover ack RP MSG, err %d (%s)", ret,
2468                      strerror(-ret));
2469                  return ret;
2470              }
2471          }
2472          break;
2473  
2474      case MIG_CMD_PING:
2475          tmp32 = qemu_get_be32(f);
2476          trace_loadvm_process_command_ping(tmp32);
2477          if (!mis->to_src_file) {
2478              error_report("CMD_PING (0x%x) received with no return path",
2479                           tmp32);
2480              return -1;
2481          }
2482          migrate_send_rp_pong(mis, tmp32);
2483          break;
2484  
2485      case MIG_CMD_PACKAGED:
2486          return loadvm_handle_cmd_packaged(mis);
2487  
2488      case MIG_CMD_POSTCOPY_ADVISE:
2489          return loadvm_postcopy_handle_advise(mis, len);
2490  
2491      case MIG_CMD_POSTCOPY_LISTEN:
2492          return loadvm_postcopy_handle_listen(mis);
2493  
2494      case MIG_CMD_POSTCOPY_RUN:
2495          return loadvm_postcopy_handle_run(mis);
2496  
2497      case MIG_CMD_POSTCOPY_RAM_DISCARD:
2498          return loadvm_postcopy_ram_handle_discard(mis, len);
2499  
2500      case MIG_CMD_POSTCOPY_RESUME:
2501          return loadvm_postcopy_handle_resume(mis);
2502  
2503      case MIG_CMD_RECV_BITMAP:
2504          return loadvm_handle_recv_bitmap(mis, len);
2505  
2506      case MIG_CMD_ENABLE_COLO:
2507          return loadvm_process_enable_colo(mis);
2508      }
2509  
2510      return 0;
2511  }
2512  
2513  /*
2514   * Read a footer off the wire and check that it matches the expected section
2515   *
2516   * Returns: true if the footer was good
2517   *          false if there is a problem (and calls error_report to say why)
2518   */
2519  static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
2520  {
2521      int ret;
2522      uint8_t read_mark;
2523      uint32_t read_section_id;
2524  
2525      if (!migrate_get_current()->send_section_footer) {
2526          /* No footer to check */
2527          return true;
2528      }
2529  
2530      read_mark = qemu_get_byte(f);
2531  
2532      ret = qemu_file_get_error(f);
2533      if (ret) {
2534          error_report("%s: Read section footer failed: %d",
2535                       __func__, ret);
2536          return false;
2537      }
2538  
2539      if (read_mark != QEMU_VM_SECTION_FOOTER) {
2540          error_report("Missing section footer for %s", se->idstr);
2541          return false;
2542      }
2543  
2544      read_section_id = qemu_get_be32(f);
2545      if (read_section_id != se->load_section_id) {
2546          error_report("Mismatched section id in footer for %s -"
2547                       " read 0x%x expected 0x%x",
2548                       se->idstr, read_section_id, se->load_section_id);
2549          return false;
2550      }
2551  
2552      /* All good */
2553      return true;
2554  }
2555  
2556  static int
2557  qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis,
2558                                 uint8_t type)
2559  {
2560      bool trace_downtime = (type == QEMU_VM_SECTION_FULL);
2561      uint32_t instance_id, version_id, section_id;
2562      int64_t start_ts, end_ts;
2563      SaveStateEntry *se;
2564      char idstr[256];
2565      int ret;
2566  
2567      /* Read section start */
2568      section_id = qemu_get_be32(f);
2569      if (!qemu_get_counted_string(f, idstr)) {
2570          error_report("Unable to read ID string for section %u",
2571                       section_id);
2572          return -EINVAL;
2573      }
2574      instance_id = qemu_get_be32(f);
2575      version_id = qemu_get_be32(f);
2576  
2577      ret = qemu_file_get_error(f);
2578      if (ret) {
2579          error_report("%s: Failed to read instance/version ID: %d",
2580                       __func__, ret);
2581          return ret;
2582      }
2583  
2584      trace_qemu_loadvm_state_section_startfull(section_id, idstr,
2585              instance_id, version_id);
2586      /* Find savevm section */
2587      se = find_se(idstr, instance_id);
2588      if (se == NULL) {
2589          error_report("Unknown savevm section or instance '%s' %"PRIu32". "
2590                       "Make sure that your current VM setup matches your "
2591                       "saved VM setup, including any hotplugged devices",
2592                       idstr, instance_id);
2593          return -EINVAL;
2594      }
2595  
2596      /* Validate version */
2597      if (version_id > se->version_id) {
2598          error_report("savevm: unsupported version %d for '%s' v%d",
2599                       version_id, idstr, se->version_id);
2600          return -EINVAL;
2601      }
2602      se->load_version_id = version_id;
2603      se->load_section_id = section_id;
2604  
2605      /* Validate if it is a device's state */
2606      if (xen_enabled() && se->is_ram) {
2607          error_report("loadvm: %s RAM loading not allowed on Xen", idstr);
2608          return -EINVAL;
2609      }
2610  
2611      if (trace_downtime) {
2612          start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
2613      }
2614  
2615      ret = vmstate_load(f, se);
2616      if (ret < 0) {
2617          error_report("error while loading state for instance 0x%"PRIx32" of"
2618                       " device '%s'", instance_id, idstr);
2619          return ret;
2620      }
2621  
2622      if (trace_downtime) {
2623          end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
2624          trace_vmstate_downtime_load("non-iterable", se->idstr,
2625                                      se->instance_id, end_ts - start_ts);
2626      }
2627  
2628      if (!check_section_footer(f, se)) {
2629          return -EINVAL;
2630      }
2631  
2632      return 0;
2633  }
2634  
2635  static int
2636  qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis,
2637                               uint8_t type)
2638  {
2639      bool trace_downtime = (type == QEMU_VM_SECTION_END);
2640      int64_t start_ts, end_ts;
2641      uint32_t section_id;
2642      SaveStateEntry *se;
2643      int ret;
2644  
2645      section_id = qemu_get_be32(f);
2646  
2647      ret = qemu_file_get_error(f);
2648      if (ret) {
2649          error_report("%s: Failed to read section ID: %d",
2650                       __func__, ret);
2651          return ret;
2652      }
2653  
2654      trace_qemu_loadvm_state_section_partend(section_id);
2655      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2656          if (se->load_section_id == section_id) {
2657              break;
2658          }
2659      }
2660      if (se == NULL) {
2661          error_report("Unknown savevm section %d", section_id);
2662          return -EINVAL;
2663      }
2664  
2665      if (trace_downtime) {
2666          start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
2667      }
2668  
2669      ret = vmstate_load(f, se);
2670      if (ret < 0) {
2671          error_report("error while loading state section id %d(%s)",
2672                       section_id, se->idstr);
2673          return ret;
2674      }
2675  
2676      if (trace_downtime) {
2677          end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
2678          trace_vmstate_downtime_load("iterable", se->idstr,
2679                                      se->instance_id, end_ts - start_ts);
2680      }
2681  
2682      if (!check_section_footer(f, se)) {
2683          return -EINVAL;
2684      }
2685  
2686      return 0;
2687  }
2688  
2689  static int qemu_loadvm_state_header(QEMUFile *f)
2690  {
2691      unsigned int v;
2692      int ret;
2693  
2694      v = qemu_get_be32(f);
2695      if (v != QEMU_VM_FILE_MAGIC) {
2696          error_report("Not a migration stream");
2697          return -EINVAL;
2698      }
2699  
2700      v = qemu_get_be32(f);
2701      if (v == QEMU_VM_FILE_VERSION_COMPAT) {
2702          error_report("SaveVM v2 format is obsolete and don't work anymore");
2703          return -ENOTSUP;
2704      }
2705      if (v != QEMU_VM_FILE_VERSION) {
2706          error_report("Unsupported migration stream version");
2707          return -ENOTSUP;
2708      }
2709  
2710      if (migrate_get_current()->send_configuration) {
2711          if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
2712              error_report("Configuration section missing");
2713              qemu_loadvm_state_cleanup();
2714              return -EINVAL;
2715          }
2716          ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
2717  
2718          if (ret) {
2719              qemu_loadvm_state_cleanup();
2720              return ret;
2721          }
2722      }
2723      return 0;
2724  }
2725  
2726  static void qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis)
2727  {
2728      SaveStateEntry *se;
2729  
2730      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2731          if (!se->ops || !se->ops->switchover_ack_needed) {
2732              continue;
2733          }
2734  
2735          if (se->ops->switchover_ack_needed(se->opaque)) {
2736              mis->switchover_ack_pending_num++;
2737          }
2738      }
2739  
2740      trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num);
2741  }
2742  
2743  static int qemu_loadvm_state_setup(QEMUFile *f)
2744  {
2745      SaveStateEntry *se;
2746      int ret;
2747  
2748      trace_loadvm_state_setup();
2749      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2750          if (!se->ops || !se->ops->load_setup) {
2751              continue;
2752          }
2753          if (se->ops->is_active) {
2754              if (!se->ops->is_active(se->opaque)) {
2755                  continue;
2756              }
2757          }
2758  
2759          ret = se->ops->load_setup(f, se->opaque);
2760          if (ret < 0) {
2761              qemu_file_set_error(f, ret);
2762              error_report("Load state of device %s failed", se->idstr);
2763              return ret;
2764          }
2765      }
2766      return 0;
2767  }
2768  
2769  void qemu_loadvm_state_cleanup(void)
2770  {
2771      SaveStateEntry *se;
2772  
2773      trace_loadvm_state_cleanup();
2774      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2775          if (se->ops && se->ops->load_cleanup) {
2776              se->ops->load_cleanup(se->opaque);
2777          }
2778      }
2779  }
2780  
2781  /* Return true if we should continue the migration, or false. */
2782  static bool postcopy_pause_incoming(MigrationIncomingState *mis)
2783  {
2784      int i;
2785  
2786      trace_postcopy_pause_incoming();
2787  
2788      assert(migrate_postcopy_ram());
2789  
2790      /*
2791       * Unregister yank with either from/to src would work, since ioc behind it
2792       * is the same
2793       */
2794      migration_ioc_unregister_yank_from_file(mis->from_src_file);
2795  
2796      assert(mis->from_src_file);
2797      qemu_file_shutdown(mis->from_src_file);
2798      qemu_fclose(mis->from_src_file);
2799      mis->from_src_file = NULL;
2800  
2801      assert(mis->to_src_file);
2802      qemu_file_shutdown(mis->to_src_file);
2803      qemu_mutex_lock(&mis->rp_mutex);
2804      qemu_fclose(mis->to_src_file);
2805      mis->to_src_file = NULL;
2806      qemu_mutex_unlock(&mis->rp_mutex);
2807  
2808      /*
2809       * NOTE: this must happen before reset the PostcopyTmpPages below,
2810       * otherwise it's racy to reset those fields when the fast load thread
2811       * can be accessing it in parallel.
2812       */
2813      if (mis->postcopy_qemufile_dst) {
2814          qemu_file_shutdown(mis->postcopy_qemufile_dst);
2815          /* Take the mutex to make sure the fast ram load thread halted */
2816          qemu_mutex_lock(&mis->postcopy_prio_thread_mutex);
2817          migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
2818          qemu_fclose(mis->postcopy_qemufile_dst);
2819          mis->postcopy_qemufile_dst = NULL;
2820          qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
2821      }
2822  
2823      /* Current state can be either ACTIVE or RECOVER */
2824      migrate_set_state(&mis->state, mis->state,
2825                        MIGRATION_STATUS_POSTCOPY_PAUSED);
2826  
2827      /* Notify the fault thread for the invalidated file handle */
2828      postcopy_fault_thread_notify(mis);
2829  
2830      /*
2831       * If network is interrupted, any temp page we received will be useless
2832       * because we didn't mark them as "received" in receivedmap.  After a
2833       * proper recovery later (which will sync src dirty bitmap with receivedmap
2834       * on dest) these cached small pages will be resent again.
2835       */
2836      for (i = 0; i < mis->postcopy_channels; i++) {
2837          postcopy_temp_page_reset(&mis->postcopy_tmp_pages[i]);
2838      }
2839  
2840      error_report("Detected IO failure for postcopy. "
2841                   "Migration paused.");
2842  
2843      while (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
2844          qemu_sem_wait(&mis->postcopy_pause_sem_dst);
2845      }
2846  
2847      trace_postcopy_pause_incoming_continued();
2848  
2849      return true;
2850  }
2851  
2852  int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
2853  {
2854      uint8_t section_type;
2855      int ret = 0;
2856  
2857  retry:
2858      while (true) {
2859          section_type = qemu_get_byte(f);
2860  
2861          ret = qemu_file_get_error_obj_any(f, mis->postcopy_qemufile_dst, NULL);
2862          if (ret) {
2863              break;
2864          }
2865  
2866          trace_qemu_loadvm_state_section(section_type);
2867          switch (section_type) {
2868          case QEMU_VM_SECTION_START:
2869          case QEMU_VM_SECTION_FULL:
2870              ret = qemu_loadvm_section_start_full(f, mis, section_type);
2871              if (ret < 0) {
2872                  goto out;
2873              }
2874              break;
2875          case QEMU_VM_SECTION_PART:
2876          case QEMU_VM_SECTION_END:
2877              ret = qemu_loadvm_section_part_end(f, mis, section_type);
2878              if (ret < 0) {
2879                  goto out;
2880              }
2881              break;
2882          case QEMU_VM_COMMAND:
2883              ret = loadvm_process_command(f);
2884              trace_qemu_loadvm_state_section_command(ret);
2885              if ((ret < 0) || (ret == LOADVM_QUIT)) {
2886                  goto out;
2887              }
2888              break;
2889          case QEMU_VM_EOF:
2890              /* This is the end of migration */
2891              goto out;
2892          default:
2893              error_report("Unknown savevm section type %d", section_type);
2894              ret = -EINVAL;
2895              goto out;
2896          }
2897      }
2898  
2899  out:
2900      if (ret < 0) {
2901          qemu_file_set_error(f, ret);
2902  
2903          /* Cancel bitmaps incoming regardless of recovery */
2904          dirty_bitmap_mig_cancel_incoming();
2905  
2906          /*
2907           * If we are during an active postcopy, then we pause instead
2908           * of bail out to at least keep the VM's dirty data.  Note
2909           * that POSTCOPY_INCOMING_LISTENING stage is still not enough,
2910           * during which we're still receiving device states and we
2911           * still haven't yet started the VM on destination.
2912           *
2913           * Only RAM postcopy supports recovery. Still, if RAM postcopy is
2914           * enabled, canceled bitmaps postcopy will not affect RAM postcopy
2915           * recovering.
2916           */
2917          if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
2918              migrate_postcopy_ram() && postcopy_pause_incoming(mis)) {
2919              /* Reset f to point to the newly created channel */
2920              f = mis->from_src_file;
2921              goto retry;
2922          }
2923      }
2924      return ret;
2925  }
2926  
2927  int qemu_loadvm_state(QEMUFile *f)
2928  {
2929      MigrationIncomingState *mis = migration_incoming_get_current();
2930      Error *local_err = NULL;
2931      int ret;
2932  
2933      if (qemu_savevm_state_blocked(&local_err)) {
2934          error_report_err(local_err);
2935          return -EINVAL;
2936      }
2937  
2938      ret = qemu_loadvm_state_header(f);
2939      if (ret) {
2940          return ret;
2941      }
2942  
2943      if (qemu_loadvm_state_setup(f) != 0) {
2944          return -EINVAL;
2945      }
2946  
2947      if (migrate_switchover_ack()) {
2948          qemu_loadvm_state_switchover_ack_needed(mis);
2949      }
2950  
2951      cpu_synchronize_all_pre_loadvm();
2952  
2953      ret = qemu_loadvm_state_main(f, mis);
2954      qemu_event_set(&mis->main_thread_load_event);
2955  
2956      trace_qemu_loadvm_state_post_main(ret);
2957  
2958      if (mis->have_listen_thread) {
2959          /* Listen thread still going, can't clean up yet */
2960          return ret;
2961      }
2962  
2963      if (ret == 0) {
2964          ret = qemu_file_get_error(f);
2965      }
2966  
2967      /*
2968       * Try to read in the VMDESC section as well, so that dumping tools that
2969       * intercept our migration stream have the chance to see it.
2970       */
2971  
2972      /* We've got to be careful; if we don't read the data and just shut the fd
2973       * then the sender can error if we close while it's still sending.
2974       * We also mustn't read data that isn't there; some transports (RDMA)
2975       * will stall waiting for that data when the source has already closed.
2976       */
2977      if (ret == 0 && should_send_vmdesc()) {
2978          uint8_t *buf;
2979          uint32_t size;
2980          uint8_t  section_type = qemu_get_byte(f);
2981  
2982          if (section_type != QEMU_VM_VMDESCRIPTION) {
2983              error_report("Expected vmdescription section, but got %d",
2984                           section_type);
2985              /*
2986               * It doesn't seem worth failing at this point since
2987               * we apparently have an otherwise valid VM state
2988               */
2989          } else {
2990              buf = g_malloc(0x1000);
2991              size = qemu_get_be32(f);
2992  
2993              while (size > 0) {
2994                  uint32_t read_chunk = MIN(size, 0x1000);
2995                  qemu_get_buffer(f, buf, read_chunk);
2996                  size -= read_chunk;
2997              }
2998              g_free(buf);
2999          }
3000      }
3001  
3002      qemu_loadvm_state_cleanup();
3003      cpu_synchronize_all_post_init();
3004  
3005      return ret;
3006  }
3007  
3008  int qemu_load_device_state(QEMUFile *f)
3009  {
3010      MigrationIncomingState *mis = migration_incoming_get_current();
3011      int ret;
3012  
3013      /* Load QEMU_VM_SECTION_FULL section */
3014      ret = qemu_loadvm_state_main(f, mis);
3015      if (ret < 0) {
3016          error_report("Failed to load device state: %d", ret);
3017          return ret;
3018      }
3019  
3020      cpu_synchronize_all_post_init();
3021      return 0;
3022  }
3023  
3024  int qemu_loadvm_approve_switchover(void)
3025  {
3026      MigrationIncomingState *mis = migration_incoming_get_current();
3027  
3028      if (!mis->switchover_ack_pending_num) {
3029          return -EINVAL;
3030      }
3031  
3032      mis->switchover_ack_pending_num--;
3033      trace_loadvm_approve_switchover(mis->switchover_ack_pending_num);
3034  
3035      if (mis->switchover_ack_pending_num) {
3036          return 0;
3037      }
3038  
3039      return migrate_send_rp_switchover_ack(mis);
3040  }
3041  
3042  bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
3043                    bool has_devices, strList *devices, Error **errp)
3044  {
3045      BlockDriverState *bs;
3046      QEMUSnapshotInfo sn1, *sn = &sn1;
3047      int ret = -1, ret2;
3048      QEMUFile *f;
3049      int saved_vm_running;
3050      uint64_t vm_state_size;
3051      g_autoptr(GDateTime) now = g_date_time_new_now_local();
3052  
3053      GLOBAL_STATE_CODE();
3054  
3055      if (migration_is_blocked(errp)) {
3056          return false;
3057      }
3058  
3059      if (!replay_can_snapshot()) {
3060          error_setg(errp, "Record/replay does not allow making snapshot "
3061                     "right now. Try once more later.");
3062          return false;
3063      }
3064  
3065      if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
3066          return false;
3067      }
3068  
3069      /* Delete old snapshots of the same name */
3070      if (name) {
3071          if (overwrite) {
3072              if (bdrv_all_delete_snapshot(name, has_devices,
3073                                           devices, errp) < 0) {
3074                  return false;
3075              }
3076          } else {
3077              ret2 = bdrv_all_has_snapshot(name, has_devices, devices, errp);
3078              if (ret2 < 0) {
3079                  return false;
3080              }
3081              if (ret2 == 1) {
3082                  error_setg(errp,
3083                             "Snapshot '%s' already exists in one or more devices",
3084                             name);
3085                  return false;
3086              }
3087          }
3088      }
3089  
3090      bs = bdrv_all_find_vmstate_bs(vmstate, has_devices, devices, errp);
3091      if (bs == NULL) {
3092          return false;
3093      }
3094  
3095      saved_vm_running = runstate_is_running();
3096  
3097      global_state_store();
3098      vm_stop(RUN_STATE_SAVE_VM);
3099  
3100      bdrv_drain_all_begin();
3101  
3102      memset(sn, 0, sizeof(*sn));
3103  
3104      /* fill auxiliary fields */
3105      sn->date_sec = g_date_time_to_unix(now);
3106      sn->date_nsec = g_date_time_get_microsecond(now) * 1000;
3107      sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
3108      if (replay_mode != REPLAY_MODE_NONE) {
3109          sn->icount = replay_get_current_icount();
3110      } else {
3111          sn->icount = -1ULL;
3112      }
3113  
3114      if (name) {
3115          pstrcpy(sn->name, sizeof(sn->name), name);
3116      } else {
3117          g_autofree char *autoname = g_date_time_format(now,  "vm-%Y%m%d%H%M%S");
3118          pstrcpy(sn->name, sizeof(sn->name), autoname);
3119      }
3120  
3121      /* save the VM state */
3122      f = qemu_fopen_bdrv(bs, 1);
3123      if (!f) {
3124          error_setg(errp, "Could not open VM state file");
3125          goto the_end;
3126      }
3127      ret = qemu_savevm_state(f, errp);
3128      vm_state_size = qemu_file_transferred(f);
3129      ret2 = qemu_fclose(f);
3130      if (ret < 0) {
3131          goto the_end;
3132      }
3133      if (ret2 < 0) {
3134          ret = ret2;
3135          goto the_end;
3136      }
3137  
3138      ret = bdrv_all_create_snapshot(sn, bs, vm_state_size,
3139                                     has_devices, devices, errp);
3140      if (ret < 0) {
3141          bdrv_all_delete_snapshot(sn->name, has_devices, devices, NULL);
3142          goto the_end;
3143      }
3144  
3145      ret = 0;
3146  
3147   the_end:
3148      bdrv_drain_all_end();
3149  
3150      if (saved_vm_running) {
3151          vm_start();
3152      }
3153      return ret == 0;
3154  }
3155  
3156  void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live,
3157                                  Error **errp)
3158  {
3159      QEMUFile *f;
3160      QIOChannelFile *ioc;
3161      int saved_vm_running;
3162      int ret;
3163  
3164      if (!has_live) {
3165          /* live default to true so old version of Xen tool stack can have a
3166           * successful live migration */
3167          live = true;
3168      }
3169  
3170      saved_vm_running = runstate_is_running();
3171      vm_stop(RUN_STATE_SAVE_VM);
3172      global_state_store_running();
3173  
3174      ioc = qio_channel_file_new_path(filename, O_WRONLY | O_CREAT | O_TRUNC,
3175                                      0660, errp);
3176      if (!ioc) {
3177          goto the_end;
3178      }
3179      qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-save-state");
3180      f = qemu_file_new_output(QIO_CHANNEL(ioc));
3181      object_unref(OBJECT(ioc));
3182      ret = qemu_save_device_state(f);
3183      if (ret < 0 || qemu_fclose(f) < 0) {
3184          error_setg(errp, QERR_IO_ERROR);
3185      } else {
3186          /* libxl calls the QMP command "stop" before calling
3187           * "xen-save-devices-state" and in case of migration failure, libxl
3188           * would call "cont".
3189           * So call bdrv_inactivate_all (release locks) here to let the other
3190           * side of the migration take control of the images.
3191           */
3192          if (live && !saved_vm_running) {
3193              ret = bdrv_inactivate_all();
3194              if (ret) {
3195                  error_setg(errp, "%s: bdrv_inactivate_all() failed (%d)",
3196                             __func__, ret);
3197              }
3198          }
3199      }
3200  
3201   the_end:
3202      if (saved_vm_running) {
3203          vm_start();
3204      }
3205  }
3206  
3207  void qmp_xen_load_devices_state(const char *filename, Error **errp)
3208  {
3209      QEMUFile *f;
3210      QIOChannelFile *ioc;
3211      int ret;
3212  
3213      /* Guest must be paused before loading the device state; the RAM state
3214       * will already have been loaded by xc
3215       */
3216      if (runstate_is_running()) {
3217          error_setg(errp, "Cannot update device state while vm is running");
3218          return;
3219      }
3220      vm_stop(RUN_STATE_RESTORE_VM);
3221  
3222      ioc = qio_channel_file_new_path(filename, O_RDONLY | O_BINARY, 0, errp);
3223      if (!ioc) {
3224          return;
3225      }
3226      qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-load-state");
3227      f = qemu_file_new_input(QIO_CHANNEL(ioc));
3228      object_unref(OBJECT(ioc));
3229  
3230      ret = qemu_loadvm_state(f);
3231      qemu_fclose(f);
3232      if (ret < 0) {
3233          error_setg(errp, QERR_IO_ERROR);
3234      }
3235      migration_incoming_state_destroy();
3236  }
3237  
3238  bool load_snapshot(const char *name, const char *vmstate,
3239                     bool has_devices, strList *devices, Error **errp)
3240  {
3241      BlockDriverState *bs_vm_state;
3242      QEMUSnapshotInfo sn;
3243      QEMUFile *f;
3244      int ret;
3245      MigrationIncomingState *mis = migration_incoming_get_current();
3246  
3247      if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
3248          return false;
3249      }
3250      ret = bdrv_all_has_snapshot(name, has_devices, devices, errp);
3251      if (ret < 0) {
3252          return false;
3253      }
3254      if (ret == 0) {
3255          error_setg(errp, "Snapshot '%s' does not exist in one or more devices",
3256                     name);
3257          return false;
3258      }
3259  
3260      bs_vm_state = bdrv_all_find_vmstate_bs(vmstate, has_devices, devices, errp);
3261      if (!bs_vm_state) {
3262          return false;
3263      }
3264  
3265      /* Don't even try to load empty VM states */
3266      ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
3267      if (ret < 0) {
3268          return false;
3269      } else if (sn.vm_state_size == 0) {
3270          error_setg(errp, "This is a disk-only snapshot. Revert to it "
3271                     " offline using qemu-img");
3272          return false;
3273      }
3274  
3275      /*
3276       * Flush the record/replay queue. Now the VM state is going
3277       * to change. Therefore we don't need to preserve its consistency
3278       */
3279      replay_flush_events();
3280  
3281      /* Flush all IO requests so they don't interfere with the new state.  */
3282      bdrv_drain_all_begin();
3283  
3284      ret = bdrv_all_goto_snapshot(name, has_devices, devices, errp);
3285      if (ret < 0) {
3286          goto err_drain;
3287      }
3288  
3289      /* restore the VM state */
3290      f = qemu_fopen_bdrv(bs_vm_state, 0);
3291      if (!f) {
3292          error_setg(errp, "Could not open VM state file");
3293          goto err_drain;
3294      }
3295  
3296      qemu_system_reset(SHUTDOWN_CAUSE_SNAPSHOT_LOAD);
3297      mis->from_src_file = f;
3298  
3299      if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
3300          ret = -EINVAL;
3301          goto err_drain;
3302      }
3303      ret = qemu_loadvm_state(f);
3304      migration_incoming_state_destroy();
3305  
3306      bdrv_drain_all_end();
3307  
3308      if (ret < 0) {
3309          error_setg(errp, "Error %d while loading VM state", ret);
3310          return false;
3311      }
3312  
3313      return true;
3314  
3315  err_drain:
3316      bdrv_drain_all_end();
3317      return false;
3318  }
3319  
3320  bool delete_snapshot(const char *name, bool has_devices,
3321                       strList *devices, Error **errp)
3322  {
3323      if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
3324          return false;
3325      }
3326  
3327      if (bdrv_all_delete_snapshot(name, has_devices, devices, errp) < 0) {
3328          return false;
3329      }
3330  
3331      return true;
3332  }
3333  
3334  void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
3335  {
3336      qemu_ram_set_idstr(mr->ram_block,
3337                         memory_region_name(mr), dev);
3338      qemu_ram_set_migratable(mr->ram_block);
3339  }
3340  
3341  void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
3342  {
3343      qemu_ram_unset_idstr(mr->ram_block);
3344      qemu_ram_unset_migratable(mr->ram_block);
3345  }
3346  
3347  void vmstate_register_ram_global(MemoryRegion *mr)
3348  {
3349      vmstate_register_ram(mr, NULL);
3350  }
3351  
3352  bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
3353  {
3354      /* check needed if --only-migratable is specified */
3355      if (!only_migratable) {
3356          return true;
3357      }
3358  
3359      return !(vmsd && vmsd->unmigratable);
3360  }
3361  
3362  typedef struct SnapshotJob {
3363      Job common;
3364      char *tag;
3365      char *vmstate;
3366      strList *devices;
3367      Coroutine *co;
3368      Error **errp;
3369      bool ret;
3370  } SnapshotJob;
3371  
3372  static void qmp_snapshot_job_free(SnapshotJob *s)
3373  {
3374      g_free(s->tag);
3375      g_free(s->vmstate);
3376      qapi_free_strList(s->devices);
3377  }
3378  
3379  
3380  static void snapshot_load_job_bh(void *opaque)
3381  {
3382      Job *job = opaque;
3383      SnapshotJob *s = container_of(job, SnapshotJob, common);
3384      int orig_vm_running;
3385  
3386      job_progress_set_remaining(&s->common, 1);
3387  
3388      orig_vm_running = runstate_is_running();
3389      vm_stop(RUN_STATE_RESTORE_VM);
3390  
3391      s->ret = load_snapshot(s->tag, s->vmstate, true, s->devices, s->errp);
3392      if (s->ret && orig_vm_running) {
3393          vm_start();
3394      }
3395  
3396      job_progress_update(&s->common, 1);
3397  
3398      qmp_snapshot_job_free(s);
3399      aio_co_wake(s->co);
3400  }
3401  
3402  static void snapshot_save_job_bh(void *opaque)
3403  {
3404      Job *job = opaque;
3405      SnapshotJob *s = container_of(job, SnapshotJob, common);
3406  
3407      job_progress_set_remaining(&s->common, 1);
3408      s->ret = save_snapshot(s->tag, false, s->vmstate,
3409                             true, s->devices, s->errp);
3410      job_progress_update(&s->common, 1);
3411  
3412      qmp_snapshot_job_free(s);
3413      aio_co_wake(s->co);
3414  }
3415  
3416  static void snapshot_delete_job_bh(void *opaque)
3417  {
3418      Job *job = opaque;
3419      SnapshotJob *s = container_of(job, SnapshotJob, common);
3420  
3421      job_progress_set_remaining(&s->common, 1);
3422      s->ret = delete_snapshot(s->tag, true, s->devices, s->errp);
3423      job_progress_update(&s->common, 1);
3424  
3425      qmp_snapshot_job_free(s);
3426      aio_co_wake(s->co);
3427  }
3428  
3429  static int coroutine_fn snapshot_save_job_run(Job *job, Error **errp)
3430  {
3431      SnapshotJob *s = container_of(job, SnapshotJob, common);
3432      s->errp = errp;
3433      s->co = qemu_coroutine_self();
3434      aio_bh_schedule_oneshot(qemu_get_aio_context(),
3435                              snapshot_save_job_bh, job);
3436      qemu_coroutine_yield();
3437      return s->ret ? 0 : -1;
3438  }
3439  
3440  static int coroutine_fn snapshot_load_job_run(Job *job, Error **errp)
3441  {
3442      SnapshotJob *s = container_of(job, SnapshotJob, common);
3443      s->errp = errp;
3444      s->co = qemu_coroutine_self();
3445      aio_bh_schedule_oneshot(qemu_get_aio_context(),
3446                              snapshot_load_job_bh, job);
3447      qemu_coroutine_yield();
3448      return s->ret ? 0 : -1;
3449  }
3450  
3451  static int coroutine_fn snapshot_delete_job_run(Job *job, Error **errp)
3452  {
3453      SnapshotJob *s = container_of(job, SnapshotJob, common);
3454      s->errp = errp;
3455      s->co = qemu_coroutine_self();
3456      aio_bh_schedule_oneshot(qemu_get_aio_context(),
3457                              snapshot_delete_job_bh, job);
3458      qemu_coroutine_yield();
3459      return s->ret ? 0 : -1;
3460  }
3461  
3462  
3463  static const JobDriver snapshot_load_job_driver = {
3464      .instance_size = sizeof(SnapshotJob),
3465      .job_type      = JOB_TYPE_SNAPSHOT_LOAD,
3466      .run           = snapshot_load_job_run,
3467  };
3468  
3469  static const JobDriver snapshot_save_job_driver = {
3470      .instance_size = sizeof(SnapshotJob),
3471      .job_type      = JOB_TYPE_SNAPSHOT_SAVE,
3472      .run           = snapshot_save_job_run,
3473  };
3474  
3475  static const JobDriver snapshot_delete_job_driver = {
3476      .instance_size = sizeof(SnapshotJob),
3477      .job_type      = JOB_TYPE_SNAPSHOT_DELETE,
3478      .run           = snapshot_delete_job_run,
3479  };
3480  
3481  
3482  void qmp_snapshot_save(const char *job_id,
3483                         const char *tag,
3484                         const char *vmstate,
3485                         strList *devices,
3486                         Error **errp)
3487  {
3488      SnapshotJob *s;
3489  
3490      s = job_create(job_id, &snapshot_save_job_driver, NULL,
3491                     qemu_get_aio_context(), JOB_MANUAL_DISMISS,
3492                     NULL, NULL, errp);
3493      if (!s) {
3494          return;
3495      }
3496  
3497      s->tag = g_strdup(tag);
3498      s->vmstate = g_strdup(vmstate);
3499      s->devices = QAPI_CLONE(strList, devices);
3500  
3501      job_start(&s->common);
3502  }
3503  
3504  void qmp_snapshot_load(const char *job_id,
3505                         const char *tag,
3506                         const char *vmstate,
3507                         strList *devices,
3508                         Error **errp)
3509  {
3510      SnapshotJob *s;
3511  
3512      s = job_create(job_id, &snapshot_load_job_driver, NULL,
3513                     qemu_get_aio_context(), JOB_MANUAL_DISMISS,
3514                     NULL, NULL, errp);
3515      if (!s) {
3516          return;
3517      }
3518  
3519      s->tag = g_strdup(tag);
3520      s->vmstate = g_strdup(vmstate);
3521      s->devices = QAPI_CLONE(strList, devices);
3522  
3523      job_start(&s->common);
3524  }
3525  
3526  void qmp_snapshot_delete(const char *job_id,
3527                           const char *tag,
3528                           strList *devices,
3529                           Error **errp)
3530  {
3531      SnapshotJob *s;
3532  
3533      s = job_create(job_id, &snapshot_delete_job_driver, NULL,
3534                     qemu_get_aio_context(), JOB_MANUAL_DISMISS,
3535                     NULL, NULL, errp);
3536      if (!s) {
3537          return;
3538      }
3539  
3540      s->tag = g_strdup(tag);
3541      s->devices = QAPI_CLONE(strList, devices);
3542  
3543      job_start(&s->common);
3544  }
3545