1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2009-2015 Red Hat Inc 6 * 7 * Authors: 8 * Juan Quintela <quintela@redhat.com> 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 */ 28 29 #include "qemu/osdep.h" 30 #include "hw/boards.h" 31 #include "net/net.h" 32 #include "migration.h" 33 #include "migration/snapshot.h" 34 #include "migration/vmstate.h" 35 #include "migration/misc.h" 36 #include "migration/register.h" 37 #include "migration/global_state.h" 38 #include "ram.h" 39 #include "qemu-file-channel.h" 40 #include "qemu-file.h" 41 #include "savevm.h" 42 #include "postcopy-ram.h" 43 #include "qapi/error.h" 44 #include "qapi/qapi-commands-migration.h" 45 #include "qapi/qmp/json-writer.h" 46 #include "qapi/qmp/qerror.h" 47 #include "qemu/error-report.h" 48 #include "sysemu/cpus.h" 49 #include "exec/memory.h" 50 #include "exec/target_page.h" 51 #include "trace.h" 52 #include "qemu/iov.h" 53 #include "qemu/main-loop.h" 54 #include "block/snapshot.h" 55 #include "qemu/cutils.h" 56 #include "io/channel-buffer.h" 57 #include "io/channel-file.h" 58 #include "sysemu/replay.h" 59 #include "sysemu/runstate.h" 60 #include "sysemu/sysemu.h" 61 #include "sysemu/xen.h" 62 #include "migration/colo.h" 63 #include "qemu/bitmap.h" 64 #include "net/announce.h" 65 66 const unsigned int postcopy_ram_discard_version; 67 68 /* Subcommands for QEMU_VM_COMMAND */ 69 enum qemu_vm_cmd { 70 MIG_CMD_INVALID = 0, /* Must be 0 */ 71 MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */ 72 MIG_CMD_PING, /* Request a PONG on the RP */ 73 74 MIG_CMD_POSTCOPY_ADVISE, /* Prior to any page transfers, just 75 warn we might want to do PC */ 76 MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming 77 pages as it's running. */ 78 MIG_CMD_POSTCOPY_RUN, /* Start execution */ 79 80 MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that 81 were previously sent during 82 precopy but are dirty. */ 83 MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */ 84 MIG_CMD_ENABLE_COLO, /* Enable COLO */ 85 MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */ 86 MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */ 87 MIG_CMD_MAX 88 }; 89 90 #define MAX_VM_CMD_PACKAGED_SIZE UINT32_MAX 91 static struct mig_cmd_args { 92 ssize_t len; /* -1 = variable */ 93 const char *name; 94 } mig_cmd_args[] = { 95 [MIG_CMD_INVALID] = { .len = -1, .name = "INVALID" }, 96 [MIG_CMD_OPEN_RETURN_PATH] = { .len = 0, .name = "OPEN_RETURN_PATH" }, 97 [MIG_CMD_PING] = { .len = sizeof(uint32_t), .name = "PING" }, 98 [MIG_CMD_POSTCOPY_ADVISE] = { .len = -1, .name = "POSTCOPY_ADVISE" }, 99 [MIG_CMD_POSTCOPY_LISTEN] = { .len = 0, .name = "POSTCOPY_LISTEN" }, 100 [MIG_CMD_POSTCOPY_RUN] = { .len = 0, .name = "POSTCOPY_RUN" }, 101 [MIG_CMD_POSTCOPY_RAM_DISCARD] = { 102 .len = -1, .name = "POSTCOPY_RAM_DISCARD" }, 103 [MIG_CMD_POSTCOPY_RESUME] = { .len = 0, .name = "POSTCOPY_RESUME" }, 104 [MIG_CMD_PACKAGED] = { .len = 4, .name = "PACKAGED" }, 105 [MIG_CMD_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, 106 [MIG_CMD_MAX] = { .len = -1, .name = "MAX" }, 107 }; 108 109 /* Note for MIG_CMD_POSTCOPY_ADVISE: 110 * The format of arguments is depending on postcopy mode: 111 * - postcopy RAM only 112 * uint64_t host page size 113 * uint64_t taget page size 114 * 115 * - postcopy RAM and postcopy dirty bitmaps 116 * format is the same as for postcopy RAM only 117 * 118 * - postcopy dirty bitmaps only 119 * Nothing. Command length field is 0. 120 * 121 * Be careful: adding a new postcopy entity with some other parameters should 122 * not break format self-description ability. Good way is to introduce some 123 * generic extendable format with an exception for two old entities. 124 */ 125 126 /***********************************************************/ 127 /* savevm/loadvm support */ 128 129 static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, 130 int64_t pos, Error **errp) 131 { 132 int ret; 133 QEMUIOVector qiov; 134 135 qemu_iovec_init_external(&qiov, iov, iovcnt); 136 ret = bdrv_writev_vmstate(opaque, &qiov, pos); 137 if (ret < 0) { 138 return ret; 139 } 140 141 return qiov.size; 142 } 143 144 static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos, 145 size_t size, Error **errp) 146 { 147 return bdrv_load_vmstate(opaque, buf, pos, size); 148 } 149 150 static int bdrv_fclose(void *opaque, Error **errp) 151 { 152 return bdrv_flush(opaque); 153 } 154 155 static const QEMUFileOps bdrv_read_ops = { 156 .get_buffer = block_get_buffer, 157 .close = bdrv_fclose 158 }; 159 160 static const QEMUFileOps bdrv_write_ops = { 161 .writev_buffer = block_writev_buffer, 162 .close = bdrv_fclose 163 }; 164 165 static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable) 166 { 167 if (is_writable) { 168 return qemu_fopen_ops(bs, &bdrv_write_ops); 169 } 170 return qemu_fopen_ops(bs, &bdrv_read_ops); 171 } 172 173 174 /* QEMUFile timer support. 175 * Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c 176 */ 177 178 void timer_put(QEMUFile *f, QEMUTimer *ts) 179 { 180 uint64_t expire_time; 181 182 expire_time = timer_expire_time_ns(ts); 183 qemu_put_be64(f, expire_time); 184 } 185 186 void timer_get(QEMUFile *f, QEMUTimer *ts) 187 { 188 uint64_t expire_time; 189 190 expire_time = qemu_get_be64(f); 191 if (expire_time != -1) { 192 timer_mod_ns(ts, expire_time); 193 } else { 194 timer_del(ts); 195 } 196 } 197 198 199 /* VMState timer support. 200 * Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c 201 */ 202 203 static int get_timer(QEMUFile *f, void *pv, size_t size, 204 const VMStateField *field) 205 { 206 QEMUTimer *v = pv; 207 timer_get(f, v); 208 return 0; 209 } 210 211 static int put_timer(QEMUFile *f, void *pv, size_t size, 212 const VMStateField *field, JSONWriter *vmdesc) 213 { 214 QEMUTimer *v = pv; 215 timer_put(f, v); 216 217 return 0; 218 } 219 220 const VMStateInfo vmstate_info_timer = { 221 .name = "timer", 222 .get = get_timer, 223 .put = put_timer, 224 }; 225 226 227 typedef struct CompatEntry { 228 char idstr[256]; 229 int instance_id; 230 } CompatEntry; 231 232 typedef struct SaveStateEntry { 233 QTAILQ_ENTRY(SaveStateEntry) entry; 234 char idstr[256]; 235 uint32_t instance_id; 236 int alias_id; 237 int version_id; 238 /* version id read from the stream */ 239 int load_version_id; 240 int section_id; 241 /* section id read from the stream */ 242 int load_section_id; 243 const SaveVMHandlers *ops; 244 const VMStateDescription *vmsd; 245 void *opaque; 246 CompatEntry *compat; 247 int is_ram; 248 } SaveStateEntry; 249 250 typedef struct SaveState { 251 QTAILQ_HEAD(, SaveStateEntry) handlers; 252 SaveStateEntry *handler_pri_head[MIG_PRI_MAX + 1]; 253 int global_section_id; 254 uint32_t len; 255 const char *name; 256 uint32_t target_page_bits; 257 uint32_t caps_count; 258 MigrationCapability *capabilities; 259 QemuUUID uuid; 260 } SaveState; 261 262 static SaveState savevm_state = { 263 .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers), 264 .handler_pri_head = { [MIG_PRI_DEFAULT ... MIG_PRI_MAX] = NULL }, 265 .global_section_id = 0, 266 }; 267 268 static bool should_validate_capability(int capability) 269 { 270 assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX); 271 /* Validate only new capabilities to keep compatibility. */ 272 switch (capability) { 273 case MIGRATION_CAPABILITY_X_IGNORE_SHARED: 274 return true; 275 default: 276 return false; 277 } 278 } 279 280 static uint32_t get_validatable_capabilities_count(void) 281 { 282 MigrationState *s = migrate_get_current(); 283 uint32_t result = 0; 284 int i; 285 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 286 if (should_validate_capability(i) && s->enabled_capabilities[i]) { 287 result++; 288 } 289 } 290 return result; 291 } 292 293 static int configuration_pre_save(void *opaque) 294 { 295 SaveState *state = opaque; 296 const char *current_name = MACHINE_GET_CLASS(current_machine)->name; 297 MigrationState *s = migrate_get_current(); 298 int i, j; 299 300 state->len = strlen(current_name); 301 state->name = current_name; 302 state->target_page_bits = qemu_target_page_bits(); 303 304 state->caps_count = get_validatable_capabilities_count(); 305 state->capabilities = g_renew(MigrationCapability, state->capabilities, 306 state->caps_count); 307 for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 308 if (should_validate_capability(i) && s->enabled_capabilities[i]) { 309 state->capabilities[j++] = i; 310 } 311 } 312 state->uuid = qemu_uuid; 313 314 return 0; 315 } 316 317 static int configuration_pre_load(void *opaque) 318 { 319 SaveState *state = opaque; 320 321 /* If there is no target-page-bits subsection it means the source 322 * predates the variable-target-page-bits support and is using the 323 * minimum possible value for this CPU. 324 */ 325 state->target_page_bits = qemu_target_page_bits_min(); 326 return 0; 327 } 328 329 static bool configuration_validate_capabilities(SaveState *state) 330 { 331 bool ret = true; 332 MigrationState *s = migrate_get_current(); 333 unsigned long *source_caps_bm; 334 int i; 335 336 source_caps_bm = bitmap_new(MIGRATION_CAPABILITY__MAX); 337 for (i = 0; i < state->caps_count; i++) { 338 MigrationCapability capability = state->capabilities[i]; 339 set_bit(capability, source_caps_bm); 340 } 341 342 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 343 bool source_state, target_state; 344 if (!should_validate_capability(i)) { 345 continue; 346 } 347 source_state = test_bit(i, source_caps_bm); 348 target_state = s->enabled_capabilities[i]; 349 if (source_state != target_state) { 350 error_report("Capability %s is %s, but received capability is %s", 351 MigrationCapability_str(i), 352 target_state ? "on" : "off", 353 source_state ? "on" : "off"); 354 ret = false; 355 /* Don't break here to report all failed capabilities */ 356 } 357 } 358 359 g_free(source_caps_bm); 360 return ret; 361 } 362 363 static int configuration_post_load(void *opaque, int version_id) 364 { 365 SaveState *state = opaque; 366 const char *current_name = MACHINE_GET_CLASS(current_machine)->name; 367 368 if (strncmp(state->name, current_name, state->len) != 0) { 369 error_report("Machine type received is '%.*s' and local is '%s'", 370 (int) state->len, state->name, current_name); 371 return -EINVAL; 372 } 373 374 if (state->target_page_bits != qemu_target_page_bits()) { 375 error_report("Received TARGET_PAGE_BITS is %d but local is %d", 376 state->target_page_bits, qemu_target_page_bits()); 377 return -EINVAL; 378 } 379 380 if (!configuration_validate_capabilities(state)) { 381 return -EINVAL; 382 } 383 384 return 0; 385 } 386 387 static int get_capability(QEMUFile *f, void *pv, size_t size, 388 const VMStateField *field) 389 { 390 MigrationCapability *capability = pv; 391 char capability_str[UINT8_MAX + 1]; 392 uint8_t len; 393 int i; 394 395 len = qemu_get_byte(f); 396 qemu_get_buffer(f, (uint8_t *)capability_str, len); 397 capability_str[len] = '\0'; 398 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 399 if (!strcmp(MigrationCapability_str(i), capability_str)) { 400 *capability = i; 401 return 0; 402 } 403 } 404 error_report("Received unknown capability %s", capability_str); 405 return -EINVAL; 406 } 407 408 static int put_capability(QEMUFile *f, void *pv, size_t size, 409 const VMStateField *field, JSONWriter *vmdesc) 410 { 411 MigrationCapability *capability = pv; 412 const char *capability_str = MigrationCapability_str(*capability); 413 size_t len = strlen(capability_str); 414 assert(len <= UINT8_MAX); 415 416 qemu_put_byte(f, len); 417 qemu_put_buffer(f, (uint8_t *)capability_str, len); 418 return 0; 419 } 420 421 static const VMStateInfo vmstate_info_capability = { 422 .name = "capability", 423 .get = get_capability, 424 .put = put_capability, 425 }; 426 427 /* The target-page-bits subsection is present only if the 428 * target page size is not the same as the default (ie the 429 * minimum page size for a variable-page-size guest CPU). 430 * If it is present then it contains the actual target page 431 * bits for the machine, and migration will fail if the 432 * two ends don't agree about it. 433 */ 434 static bool vmstate_target_page_bits_needed(void *opaque) 435 { 436 return qemu_target_page_bits() 437 > qemu_target_page_bits_min(); 438 } 439 440 static const VMStateDescription vmstate_target_page_bits = { 441 .name = "configuration/target-page-bits", 442 .version_id = 1, 443 .minimum_version_id = 1, 444 .needed = vmstate_target_page_bits_needed, 445 .fields = (VMStateField[]) { 446 VMSTATE_UINT32(target_page_bits, SaveState), 447 VMSTATE_END_OF_LIST() 448 } 449 }; 450 451 static bool vmstate_capabilites_needed(void *opaque) 452 { 453 return get_validatable_capabilities_count() > 0; 454 } 455 456 static const VMStateDescription vmstate_capabilites = { 457 .name = "configuration/capabilities", 458 .version_id = 1, 459 .minimum_version_id = 1, 460 .needed = vmstate_capabilites_needed, 461 .fields = (VMStateField[]) { 462 VMSTATE_UINT32_V(caps_count, SaveState, 1), 463 VMSTATE_VARRAY_UINT32_ALLOC(capabilities, SaveState, caps_count, 1, 464 vmstate_info_capability, 465 MigrationCapability), 466 VMSTATE_END_OF_LIST() 467 } 468 }; 469 470 static bool vmstate_uuid_needed(void *opaque) 471 { 472 return qemu_uuid_set && migrate_validate_uuid(); 473 } 474 475 static int vmstate_uuid_post_load(void *opaque, int version_id) 476 { 477 SaveState *state = opaque; 478 char uuid_src[UUID_FMT_LEN + 1]; 479 char uuid_dst[UUID_FMT_LEN + 1]; 480 481 if (!qemu_uuid_set) { 482 /* 483 * It's warning because user might not know UUID in some cases, 484 * e.g. load an old snapshot 485 */ 486 qemu_uuid_unparse(&state->uuid, uuid_src); 487 warn_report("UUID is received %s, but local uuid isn't set", 488 uuid_src); 489 return 0; 490 } 491 if (!qemu_uuid_is_equal(&state->uuid, &qemu_uuid)) { 492 qemu_uuid_unparse(&state->uuid, uuid_src); 493 qemu_uuid_unparse(&qemu_uuid, uuid_dst); 494 error_report("UUID received is %s and local is %s", uuid_src, uuid_dst); 495 return -EINVAL; 496 } 497 return 0; 498 } 499 500 static const VMStateDescription vmstate_uuid = { 501 .name = "configuration/uuid", 502 .version_id = 1, 503 .minimum_version_id = 1, 504 .needed = vmstate_uuid_needed, 505 .post_load = vmstate_uuid_post_load, 506 .fields = (VMStateField[]) { 507 VMSTATE_UINT8_ARRAY_V(uuid.data, SaveState, sizeof(QemuUUID), 1), 508 VMSTATE_END_OF_LIST() 509 } 510 }; 511 512 static const VMStateDescription vmstate_configuration = { 513 .name = "configuration", 514 .version_id = 1, 515 .pre_load = configuration_pre_load, 516 .post_load = configuration_post_load, 517 .pre_save = configuration_pre_save, 518 .fields = (VMStateField[]) { 519 VMSTATE_UINT32(len, SaveState), 520 VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len), 521 VMSTATE_END_OF_LIST() 522 }, 523 .subsections = (const VMStateDescription *[]) { 524 &vmstate_target_page_bits, 525 &vmstate_capabilites, 526 &vmstate_uuid, 527 NULL 528 } 529 }; 530 531 static void dump_vmstate_vmsd(FILE *out_file, 532 const VMStateDescription *vmsd, int indent, 533 bool is_subsection); 534 535 static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field, 536 int indent) 537 { 538 fprintf(out_file, "%*s{\n", indent, ""); 539 indent += 2; 540 fprintf(out_file, "%*s\"field\": \"%s\",\n", indent, "", field->name); 541 fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "", 542 field->version_id); 543 fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "", 544 field->field_exists ? "true" : "false"); 545 fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size); 546 if (field->vmsd != NULL) { 547 fprintf(out_file, ",\n"); 548 dump_vmstate_vmsd(out_file, field->vmsd, indent, false); 549 } 550 fprintf(out_file, "\n%*s}", indent - 2, ""); 551 } 552 553 static void dump_vmstate_vmss(FILE *out_file, 554 const VMStateDescription **subsection, 555 int indent) 556 { 557 if (*subsection != NULL) { 558 dump_vmstate_vmsd(out_file, *subsection, indent, true); 559 } 560 } 561 562 static void dump_vmstate_vmsd(FILE *out_file, 563 const VMStateDescription *vmsd, int indent, 564 bool is_subsection) 565 { 566 if (is_subsection) { 567 fprintf(out_file, "%*s{\n", indent, ""); 568 } else { 569 fprintf(out_file, "%*s\"%s\": {\n", indent, "", "Description"); 570 } 571 indent += 2; 572 fprintf(out_file, "%*s\"name\": \"%s\",\n", indent, "", vmsd->name); 573 fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "", 574 vmsd->version_id); 575 fprintf(out_file, "%*s\"minimum_version_id\": %d", indent, "", 576 vmsd->minimum_version_id); 577 if (vmsd->fields != NULL) { 578 const VMStateField *field = vmsd->fields; 579 bool first; 580 581 fprintf(out_file, ",\n%*s\"Fields\": [\n", indent, ""); 582 first = true; 583 while (field->name != NULL) { 584 if (field->flags & VMS_MUST_EXIST) { 585 /* Ignore VMSTATE_VALIDATE bits; these don't get migrated */ 586 field++; 587 continue; 588 } 589 if (!first) { 590 fprintf(out_file, ",\n"); 591 } 592 dump_vmstate_vmsf(out_file, field, indent + 2); 593 field++; 594 first = false; 595 } 596 fprintf(out_file, "\n%*s]", indent, ""); 597 } 598 if (vmsd->subsections != NULL) { 599 const VMStateDescription **subsection = vmsd->subsections; 600 bool first; 601 602 fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, ""); 603 first = true; 604 while (*subsection != NULL) { 605 if (!first) { 606 fprintf(out_file, ",\n"); 607 } 608 dump_vmstate_vmss(out_file, subsection, indent + 2); 609 subsection++; 610 first = false; 611 } 612 fprintf(out_file, "\n%*s]", indent, ""); 613 } 614 fprintf(out_file, "\n%*s}", indent - 2, ""); 615 } 616 617 static void dump_machine_type(FILE *out_file) 618 { 619 MachineClass *mc; 620 621 mc = MACHINE_GET_CLASS(current_machine); 622 623 fprintf(out_file, " \"vmschkmachine\": {\n"); 624 fprintf(out_file, " \"Name\": \"%s\"\n", mc->name); 625 fprintf(out_file, " },\n"); 626 } 627 628 void dump_vmstate_json_to_file(FILE *out_file) 629 { 630 GSList *list, *elt; 631 bool first; 632 633 fprintf(out_file, "{\n"); 634 dump_machine_type(out_file); 635 636 first = true; 637 list = object_class_get_list(TYPE_DEVICE, true); 638 for (elt = list; elt; elt = elt->next) { 639 DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data, 640 TYPE_DEVICE); 641 const char *name; 642 int indent = 2; 643 644 if (!dc->vmsd) { 645 continue; 646 } 647 648 if (!first) { 649 fprintf(out_file, ",\n"); 650 } 651 name = object_class_get_name(OBJECT_CLASS(dc)); 652 fprintf(out_file, "%*s\"%s\": {\n", indent, "", name); 653 indent += 2; 654 fprintf(out_file, "%*s\"Name\": \"%s\",\n", indent, "", name); 655 fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "", 656 dc->vmsd->version_id); 657 fprintf(out_file, "%*s\"minimum_version_id\": %d,\n", indent, "", 658 dc->vmsd->minimum_version_id); 659 660 dump_vmstate_vmsd(out_file, dc->vmsd, indent, false); 661 662 fprintf(out_file, "\n%*s}", indent - 2, ""); 663 first = false; 664 } 665 fprintf(out_file, "\n}\n"); 666 fclose(out_file); 667 g_slist_free(list); 668 } 669 670 static uint32_t calculate_new_instance_id(const char *idstr) 671 { 672 SaveStateEntry *se; 673 uint32_t instance_id = 0; 674 675 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 676 if (strcmp(idstr, se->idstr) == 0 677 && instance_id <= se->instance_id) { 678 instance_id = se->instance_id + 1; 679 } 680 } 681 /* Make sure we never loop over without being noticed */ 682 assert(instance_id != VMSTATE_INSTANCE_ID_ANY); 683 return instance_id; 684 } 685 686 static int calculate_compat_instance_id(const char *idstr) 687 { 688 SaveStateEntry *se; 689 int instance_id = 0; 690 691 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 692 if (!se->compat) { 693 continue; 694 } 695 696 if (strcmp(idstr, se->compat->idstr) == 0 697 && instance_id <= se->compat->instance_id) { 698 instance_id = se->compat->instance_id + 1; 699 } 700 } 701 return instance_id; 702 } 703 704 static inline MigrationPriority save_state_priority(SaveStateEntry *se) 705 { 706 if (se->vmsd) { 707 return se->vmsd->priority; 708 } 709 return MIG_PRI_DEFAULT; 710 } 711 712 static void savevm_state_handler_insert(SaveStateEntry *nse) 713 { 714 MigrationPriority priority = save_state_priority(nse); 715 SaveStateEntry *se; 716 int i; 717 718 assert(priority <= MIG_PRI_MAX); 719 720 for (i = priority - 1; i >= 0; i--) { 721 se = savevm_state.handler_pri_head[i]; 722 if (se != NULL) { 723 assert(save_state_priority(se) < priority); 724 break; 725 } 726 } 727 728 if (i >= 0) { 729 QTAILQ_INSERT_BEFORE(se, nse, entry); 730 } else { 731 QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry); 732 } 733 734 if (savevm_state.handler_pri_head[priority] == NULL) { 735 savevm_state.handler_pri_head[priority] = nse; 736 } 737 } 738 739 static void savevm_state_handler_remove(SaveStateEntry *se) 740 { 741 SaveStateEntry *next; 742 MigrationPriority priority = save_state_priority(se); 743 744 if (se == savevm_state.handler_pri_head[priority]) { 745 next = QTAILQ_NEXT(se, entry); 746 if (next != NULL && save_state_priority(next) == priority) { 747 savevm_state.handler_pri_head[priority] = next; 748 } else { 749 savevm_state.handler_pri_head[priority] = NULL; 750 } 751 } 752 QTAILQ_REMOVE(&savevm_state.handlers, se, entry); 753 } 754 755 /* TODO: Individual devices generally have very little idea about the rest 756 of the system, so instance_id should be removed/replaced. 757 Meanwhile pass -1 as instance_id if you do not already have a clearly 758 distinguishing id for all instances of your device class. */ 759 int register_savevm_live(const char *idstr, 760 uint32_t instance_id, 761 int version_id, 762 const SaveVMHandlers *ops, 763 void *opaque) 764 { 765 SaveStateEntry *se; 766 767 se = g_new0(SaveStateEntry, 1); 768 se->version_id = version_id; 769 se->section_id = savevm_state.global_section_id++; 770 se->ops = ops; 771 se->opaque = opaque; 772 se->vmsd = NULL; 773 /* if this is a live_savem then set is_ram */ 774 if (ops->save_setup != NULL) { 775 se->is_ram = 1; 776 } 777 778 pstrcat(se->idstr, sizeof(se->idstr), idstr); 779 780 if (instance_id == VMSTATE_INSTANCE_ID_ANY) { 781 se->instance_id = calculate_new_instance_id(se->idstr); 782 } else { 783 se->instance_id = instance_id; 784 } 785 assert(!se->compat || se->instance_id == 0); 786 savevm_state_handler_insert(se); 787 return 0; 788 } 789 790 void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque) 791 { 792 SaveStateEntry *se, *new_se; 793 char id[256] = ""; 794 795 if (obj) { 796 char *oid = vmstate_if_get_id(obj); 797 if (oid) { 798 pstrcpy(id, sizeof(id), oid); 799 pstrcat(id, sizeof(id), "/"); 800 g_free(oid); 801 } 802 } 803 pstrcat(id, sizeof(id), idstr); 804 805 QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) { 806 if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) { 807 savevm_state_handler_remove(se); 808 g_free(se->compat); 809 g_free(se); 810 } 811 } 812 } 813 814 int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id, 815 const VMStateDescription *vmsd, 816 void *opaque, int alias_id, 817 int required_for_version, 818 Error **errp) 819 { 820 SaveStateEntry *se; 821 822 /* If this triggers, alias support can be dropped for the vmsd. */ 823 assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id); 824 825 se = g_new0(SaveStateEntry, 1); 826 se->version_id = vmsd->version_id; 827 se->section_id = savevm_state.global_section_id++; 828 se->opaque = opaque; 829 se->vmsd = vmsd; 830 se->alias_id = alias_id; 831 832 if (obj) { 833 char *id = vmstate_if_get_id(obj); 834 if (id) { 835 if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >= 836 sizeof(se->idstr)) { 837 error_setg(errp, "Path too long for VMState (%s)", id); 838 g_free(id); 839 g_free(se); 840 841 return -1; 842 } 843 g_free(id); 844 845 se->compat = g_new0(CompatEntry, 1); 846 pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); 847 se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ? 848 calculate_compat_instance_id(vmsd->name) : instance_id; 849 instance_id = VMSTATE_INSTANCE_ID_ANY; 850 } 851 } 852 pstrcat(se->idstr, sizeof(se->idstr), vmsd->name); 853 854 if (instance_id == VMSTATE_INSTANCE_ID_ANY) { 855 se->instance_id = calculate_new_instance_id(se->idstr); 856 } else { 857 se->instance_id = instance_id; 858 } 859 assert(!se->compat || se->instance_id == 0); 860 savevm_state_handler_insert(se); 861 return 0; 862 } 863 864 void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd, 865 void *opaque) 866 { 867 SaveStateEntry *se, *new_se; 868 869 QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) { 870 if (se->vmsd == vmsd && se->opaque == opaque) { 871 savevm_state_handler_remove(se); 872 g_free(se->compat); 873 g_free(se); 874 } 875 } 876 } 877 878 static int vmstate_load(QEMUFile *f, SaveStateEntry *se) 879 { 880 trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)"); 881 if (!se->vmsd) { /* Old style */ 882 return se->ops->load_state(f, se->opaque, se->load_version_id); 883 } 884 return vmstate_load_state(f, se->vmsd, se->opaque, se->load_version_id); 885 } 886 887 static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se, 888 JSONWriter *vmdesc) 889 { 890 int64_t old_offset, size; 891 892 old_offset = qemu_ftell_fast(f); 893 se->ops->save_state(f, se->opaque); 894 size = qemu_ftell_fast(f) - old_offset; 895 896 if (vmdesc) { 897 json_writer_int64(vmdesc, "size", size); 898 json_writer_start_array(vmdesc, "fields"); 899 json_writer_start_object(vmdesc, NULL); 900 json_writer_str(vmdesc, "name", "data"); 901 json_writer_int64(vmdesc, "size", size); 902 json_writer_str(vmdesc, "type", "buffer"); 903 json_writer_end_object(vmdesc); 904 json_writer_end_array(vmdesc); 905 } 906 } 907 908 static int vmstate_save(QEMUFile *f, SaveStateEntry *se, 909 JSONWriter *vmdesc) 910 { 911 trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)"); 912 if (!se->vmsd) { 913 vmstate_save_old_style(f, se, vmdesc); 914 return 0; 915 } 916 return vmstate_save_state(f, se->vmsd, se->opaque, vmdesc); 917 } 918 919 /* 920 * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL) 921 */ 922 static void save_section_header(QEMUFile *f, SaveStateEntry *se, 923 uint8_t section_type) 924 { 925 qemu_put_byte(f, section_type); 926 qemu_put_be32(f, se->section_id); 927 928 if (section_type == QEMU_VM_SECTION_FULL || 929 section_type == QEMU_VM_SECTION_START) { 930 /* ID string */ 931 size_t len = strlen(se->idstr); 932 qemu_put_byte(f, len); 933 qemu_put_buffer(f, (uint8_t *)se->idstr, len); 934 935 qemu_put_be32(f, se->instance_id); 936 qemu_put_be32(f, se->version_id); 937 } 938 } 939 940 /* 941 * Write a footer onto device sections that catches cases misformatted device 942 * sections. 943 */ 944 static void save_section_footer(QEMUFile *f, SaveStateEntry *se) 945 { 946 if (migrate_get_current()->send_section_footer) { 947 qemu_put_byte(f, QEMU_VM_SECTION_FOOTER); 948 qemu_put_be32(f, se->section_id); 949 } 950 } 951 952 /** 953 * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the 954 * command and associated data. 955 * 956 * @f: File to send command on 957 * @command: Command type to send 958 * @len: Length of associated data 959 * @data: Data associated with command. 960 */ 961 static void qemu_savevm_command_send(QEMUFile *f, 962 enum qemu_vm_cmd command, 963 uint16_t len, 964 uint8_t *data) 965 { 966 trace_savevm_command_send(command, len); 967 qemu_put_byte(f, QEMU_VM_COMMAND); 968 qemu_put_be16(f, (uint16_t)command); 969 qemu_put_be16(f, len); 970 qemu_put_buffer(f, data, len); 971 qemu_fflush(f); 972 } 973 974 void qemu_savevm_send_colo_enable(QEMUFile *f) 975 { 976 trace_savevm_send_colo_enable(); 977 qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL); 978 } 979 980 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value) 981 { 982 uint32_t buf; 983 984 trace_savevm_send_ping(value); 985 buf = cpu_to_be32(value); 986 qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf); 987 } 988 989 void qemu_savevm_send_open_return_path(QEMUFile *f) 990 { 991 trace_savevm_send_open_return_path(); 992 qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL); 993 } 994 995 /* We have a buffer of data to send; we don't want that all to be loaded 996 * by the command itself, so the command contains just the length of the 997 * extra buffer that we then send straight after it. 998 * TODO: Must be a better way to organise that 999 * 1000 * Returns: 1001 * 0 on success 1002 * -ve on error 1003 */ 1004 int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len) 1005 { 1006 uint32_t tmp; 1007 1008 if (len > MAX_VM_CMD_PACKAGED_SIZE) { 1009 error_report("%s: Unreasonably large packaged state: %zu", 1010 __func__, len); 1011 return -1; 1012 } 1013 1014 tmp = cpu_to_be32(len); 1015 1016 trace_qemu_savevm_send_packaged(); 1017 qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp); 1018 1019 qemu_put_buffer(f, buf, len); 1020 1021 return 0; 1022 } 1023 1024 /* Send prior to any postcopy transfer */ 1025 void qemu_savevm_send_postcopy_advise(QEMUFile *f) 1026 { 1027 if (migrate_postcopy_ram()) { 1028 uint64_t tmp[2]; 1029 tmp[0] = cpu_to_be64(ram_pagesize_summary()); 1030 tmp[1] = cpu_to_be64(qemu_target_page_size()); 1031 1032 trace_qemu_savevm_send_postcopy_advise(); 1033 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 1034 16, (uint8_t *)tmp); 1035 } else { 1036 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 0, NULL); 1037 } 1038 } 1039 1040 /* Sent prior to starting the destination running in postcopy, discard pages 1041 * that have already been sent but redirtied on the source. 1042 * CMD_POSTCOPY_RAM_DISCARD consist of: 1043 * byte version (0) 1044 * byte Length of name field (not including 0) 1045 * n x byte RAM block name 1046 * byte 0 terminator (just for safety) 1047 * n x Byte ranges within the named RAMBlock 1048 * be64 Start of the range 1049 * be64 Length 1050 * 1051 * name: RAMBlock name that these entries are part of 1052 * len: Number of page entries 1053 * start_list: 'len' addresses 1054 * length_list: 'len' addresses 1055 * 1056 */ 1057 void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name, 1058 uint16_t len, 1059 uint64_t *start_list, 1060 uint64_t *length_list) 1061 { 1062 uint8_t *buf; 1063 uint16_t tmplen; 1064 uint16_t t; 1065 size_t name_len = strlen(name); 1066 1067 trace_qemu_savevm_send_postcopy_ram_discard(name, len); 1068 assert(name_len < 256); 1069 buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len); 1070 buf[0] = postcopy_ram_discard_version; 1071 buf[1] = name_len; 1072 memcpy(buf + 2, name, name_len); 1073 tmplen = 2 + name_len; 1074 buf[tmplen++] = '\0'; 1075 1076 for (t = 0; t < len; t++) { 1077 stq_be_p(buf + tmplen, start_list[t]); 1078 tmplen += 8; 1079 stq_be_p(buf + tmplen, length_list[t]); 1080 tmplen += 8; 1081 } 1082 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf); 1083 g_free(buf); 1084 } 1085 1086 /* Get the destination into a state where it can receive postcopy data. */ 1087 void qemu_savevm_send_postcopy_listen(QEMUFile *f) 1088 { 1089 trace_savevm_send_postcopy_listen(); 1090 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL); 1091 } 1092 1093 /* Kick the destination into running */ 1094 void qemu_savevm_send_postcopy_run(QEMUFile *f) 1095 { 1096 trace_savevm_send_postcopy_run(); 1097 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL); 1098 } 1099 1100 void qemu_savevm_send_postcopy_resume(QEMUFile *f) 1101 { 1102 trace_savevm_send_postcopy_resume(); 1103 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RESUME, 0, NULL); 1104 } 1105 1106 void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name) 1107 { 1108 size_t len; 1109 char buf[256]; 1110 1111 trace_savevm_send_recv_bitmap(block_name); 1112 1113 buf[0] = len = strlen(block_name); 1114 memcpy(buf + 1, block_name, len); 1115 1116 qemu_savevm_command_send(f, MIG_CMD_RECV_BITMAP, len + 1, (uint8_t *)buf); 1117 } 1118 1119 bool qemu_savevm_state_blocked(Error **errp) 1120 { 1121 SaveStateEntry *se; 1122 1123 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 1124 if (se->vmsd && se->vmsd->unmigratable) { 1125 error_setg(errp, "State blocked by non-migratable device '%s'", 1126 se->idstr); 1127 return true; 1128 } 1129 } 1130 return false; 1131 } 1132 1133 void qemu_savevm_state_header(QEMUFile *f) 1134 { 1135 trace_savevm_state_header(); 1136 qemu_put_be32(f, QEMU_VM_FILE_MAGIC); 1137 qemu_put_be32(f, QEMU_VM_FILE_VERSION); 1138 1139 if (migrate_get_current()->send_configuration) { 1140 qemu_put_byte(f, QEMU_VM_CONFIGURATION); 1141 vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0); 1142 } 1143 } 1144 1145 bool qemu_savevm_state_guest_unplug_pending(void) 1146 { 1147 SaveStateEntry *se; 1148 1149 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 1150 if (se->vmsd && se->vmsd->dev_unplug_pending && 1151 se->vmsd->dev_unplug_pending(se->opaque)) { 1152 return true; 1153 } 1154 } 1155 1156 return false; 1157 } 1158 1159 void qemu_savevm_state_setup(QEMUFile *f) 1160 { 1161 SaveStateEntry *se; 1162 Error *local_err = NULL; 1163 int ret; 1164 1165 trace_savevm_state_setup(); 1166 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 1167 if (!se->ops || !se->ops->save_setup) { 1168 continue; 1169 } 1170 if (se->ops->is_active) { 1171 if (!se->ops->is_active(se->opaque)) { 1172 continue; 1173 } 1174 } 1175 save_section_header(f, se, QEMU_VM_SECTION_START); 1176 1177 ret = se->ops->save_setup(f, se->opaque); 1178 save_section_footer(f, se); 1179 if (ret < 0) { 1180 qemu_file_set_error(f, ret); 1181 break; 1182 } 1183 } 1184 1185 if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) { 1186 error_report_err(local_err); 1187 } 1188 } 1189 1190 int qemu_savevm_state_resume_prepare(MigrationState *s) 1191 { 1192 SaveStateEntry *se; 1193 int ret; 1194 1195 trace_savevm_state_resume_prepare(); 1196 1197 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 1198 if (!se->ops || !se->ops->resume_prepare) { 1199 continue; 1200 } 1201 if (se->ops->is_active) { 1202 if (!se->ops->is_active(se->opaque)) { 1203 continue; 1204 } 1205 } 1206 ret = se->ops->resume_prepare(s, se->opaque); 1207 if (ret < 0) { 1208 return ret; 1209 } 1210 } 1211 1212 return 0; 1213 } 1214 1215 /* 1216 * this function has three return values: 1217 * negative: there was one error, and we have -errno. 1218 * 0 : We haven't finished, caller have to go again 1219 * 1 : We have finished, we can go to complete phase 1220 */ 1221 int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) 1222 { 1223 SaveStateEntry *se; 1224 int ret = 1; 1225 1226 trace_savevm_state_iterate(); 1227 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 1228 if (!se->ops || !se->ops->save_live_iterate) { 1229 continue; 1230 } 1231 if (se->ops->is_active && 1232 !se->ops->is_active(se->opaque)) { 1233 continue; 1234 } 1235 if (se->ops->is_active_iterate && 1236 !se->ops->is_active_iterate(se->opaque)) { 1237 continue; 1238 } 1239 /* 1240 * In the postcopy phase, any device that doesn't know how to 1241 * do postcopy should have saved it's state in the _complete 1242 * call that's already run, it might get confused if we call 1243 * iterate afterwards. 1244 */ 1245 if (postcopy && 1246 !(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) { 1247 continue; 1248 } 1249 if (qemu_file_rate_limit(f)) { 1250 return 0; 1251 } 1252 trace_savevm_section_start(se->idstr, se->section_id); 1253 1254 save_section_header(f, se, QEMU_VM_SECTION_PART); 1255 1256 ret = se->ops->save_live_iterate(f, se->opaque); 1257 trace_savevm_section_end(se->idstr, se->section_id, ret); 1258 save_section_footer(f, se); 1259 1260 if (ret < 0) { 1261 error_report("failed to save SaveStateEntry with id(name): %d(%s)", 1262 se->section_id, se->idstr); 1263 qemu_file_set_error(f, ret); 1264 } 1265 if (ret <= 0) { 1266 /* Do not proceed to the next vmstate before this one reported 1267 completion of the current stage. This serializes the migration 1268 and reduces the probability that a faster changing state is 1269 synchronized over and over again. */ 1270 break; 1271 } 1272 } 1273 return ret; 1274 } 1275 1276 static bool should_send_vmdesc(void) 1277 { 1278 MachineState *machine = MACHINE(qdev_get_machine()); 1279 bool in_postcopy = migration_in_postcopy(); 1280 return !machine->suppress_vmdesc && !in_postcopy; 1281 } 1282 1283 /* 1284 * Calls the save_live_complete_postcopy methods 1285 * causing the last few pages to be sent immediately and doing any associated 1286 * cleanup. 1287 * Note postcopy also calls qemu_savevm_state_complete_precopy to complete 1288 * all the other devices, but that happens at the point we switch to postcopy. 1289 */ 1290 void qemu_savevm_state_complete_postcopy(QEMUFile *f) 1291 { 1292 SaveStateEntry *se; 1293 int ret; 1294 1295 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 1296 if (!se->ops || !se->ops->save_live_complete_postcopy) { 1297 continue; 1298 } 1299 if (se->ops->is_active) { 1300 if (!se->ops->is_active(se->opaque)) { 1301 continue; 1302 } 1303 } 1304 trace_savevm_section_start(se->idstr, se->section_id); 1305 /* Section type */ 1306 qemu_put_byte(f, QEMU_VM_SECTION_END); 1307 qemu_put_be32(f, se->section_id); 1308 1309 ret = se->ops->save_live_complete_postcopy(f, se->opaque); 1310 trace_savevm_section_end(se->idstr, se->section_id, ret); 1311 save_section_footer(f, se); 1312 if (ret < 0) { 1313 qemu_file_set_error(f, ret); 1314 return; 1315 } 1316 } 1317 1318 qemu_put_byte(f, QEMU_VM_EOF); 1319 qemu_fflush(f); 1320 } 1321 1322 static 1323 int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy) 1324 { 1325 SaveStateEntry *se; 1326 int ret; 1327 1328 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 1329 if (!se->ops || 1330 (in_postcopy && se->ops->has_postcopy && 1331 se->ops->has_postcopy(se->opaque)) || 1332 !se->ops->save_live_complete_precopy) { 1333 continue; 1334 } 1335 1336 if (se->ops->is_active) { 1337 if (!se->ops->is_active(se->opaque)) { 1338 continue; 1339 } 1340 } 1341 trace_savevm_section_start(se->idstr, se->section_id); 1342 1343 save_section_header(f, se, QEMU_VM_SECTION_END); 1344 1345 ret = se->ops->save_live_complete_precopy(f, se->opaque); 1346 trace_savevm_section_end(se->idstr, se->section_id, ret); 1347 save_section_footer(f, se); 1348 if (ret < 0) { 1349 qemu_file_set_error(f, ret); 1350 return -1; 1351 } 1352 } 1353 1354 return 0; 1355 } 1356 1357 static 1358 int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, 1359 bool in_postcopy, 1360 bool inactivate_disks) 1361 { 1362 g_autoptr(JSONWriter) vmdesc = NULL; 1363 int vmdesc_len; 1364 SaveStateEntry *se; 1365 int ret; 1366 1367 vmdesc = json_writer_new(false); 1368 json_writer_start_object(vmdesc, NULL); 1369 json_writer_int64(vmdesc, "page_size", qemu_target_page_size()); 1370 json_writer_start_array(vmdesc, "devices"); 1371 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 1372 1373 if ((!se->ops || !se->ops->save_state) && !se->vmsd) { 1374 continue; 1375 } 1376 if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) { 1377 trace_savevm_section_skip(se->idstr, se->section_id); 1378 continue; 1379 } 1380 1381 trace_savevm_section_start(se->idstr, se->section_id); 1382 1383 json_writer_start_object(vmdesc, NULL); 1384 json_writer_str(vmdesc, "name", se->idstr); 1385 json_writer_int64(vmdesc, "instance_id", se->instance_id); 1386 1387 save_section_header(f, se, QEMU_VM_SECTION_FULL); 1388 ret = vmstate_save(f, se, vmdesc); 1389 if (ret) { 1390 qemu_file_set_error(f, ret); 1391 return ret; 1392 } 1393 trace_savevm_section_end(se->idstr, se->section_id, 0); 1394 save_section_footer(f, se); 1395 1396 json_writer_end_object(vmdesc); 1397 } 1398 1399 if (inactivate_disks) { 1400 /* Inactivate before sending QEMU_VM_EOF so that the 1401 * bdrv_invalidate_cache_all() on the other end won't fail. */ 1402 ret = bdrv_inactivate_all(); 1403 if (ret) { 1404 error_report("%s: bdrv_inactivate_all() failed (%d)", 1405 __func__, ret); 1406 qemu_file_set_error(f, ret); 1407 return ret; 1408 } 1409 } 1410 if (!in_postcopy) { 1411 /* Postcopy stream will still be going */ 1412 qemu_put_byte(f, QEMU_VM_EOF); 1413 } 1414 1415 json_writer_end_array(vmdesc); 1416 json_writer_end_object(vmdesc); 1417 vmdesc_len = strlen(json_writer_get(vmdesc)); 1418 1419 if (should_send_vmdesc()) { 1420 qemu_put_byte(f, QEMU_VM_VMDESCRIPTION); 1421 qemu_put_be32(f, vmdesc_len); 1422 qemu_put_buffer(f, (uint8_t *)json_writer_get(vmdesc), vmdesc_len); 1423 } 1424 1425 return 0; 1426 } 1427 1428 int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, 1429 bool inactivate_disks) 1430 { 1431 int ret; 1432 Error *local_err = NULL; 1433 bool in_postcopy = migration_in_postcopy(); 1434 1435 if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) { 1436 error_report_err(local_err); 1437 } 1438 1439 trace_savevm_state_complete_precopy(); 1440 1441 cpu_synchronize_all_states(); 1442 1443 if (!in_postcopy || iterable_only) { 1444 ret = qemu_savevm_state_complete_precopy_iterable(f, in_postcopy); 1445 if (ret) { 1446 return ret; 1447 } 1448 } 1449 1450 if (iterable_only) { 1451 goto flush; 1452 } 1453 1454 ret = qemu_savevm_state_complete_precopy_non_iterable(f, in_postcopy, 1455 inactivate_disks); 1456 if (ret) { 1457 return ret; 1458 } 1459 1460 flush: 1461 qemu_fflush(f); 1462 return 0; 1463 } 1464 1465 /* Give an estimate of the amount left to be transferred, 1466 * the result is split into the amount for units that can and 1467 * for units that can't do postcopy. 1468 */ 1469 void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size, 1470 uint64_t *res_precopy_only, 1471 uint64_t *res_compatible, 1472 uint64_t *res_postcopy_only) 1473 { 1474 SaveStateEntry *se; 1475 1476 *res_precopy_only = 0; 1477 *res_compatible = 0; 1478 *res_postcopy_only = 0; 1479 1480 1481 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 1482 if (!se->ops || !se->ops->save_live_pending) { 1483 continue; 1484 } 1485 if (se->ops->is_active) { 1486 if (!se->ops->is_active(se->opaque)) { 1487 continue; 1488 } 1489 } 1490 se->ops->save_live_pending(f, se->opaque, threshold_size, 1491 res_precopy_only, res_compatible, 1492 res_postcopy_only); 1493 } 1494 } 1495 1496 void qemu_savevm_state_cleanup(void) 1497 { 1498 SaveStateEntry *se; 1499 Error *local_err = NULL; 1500 1501 if (precopy_notify(PRECOPY_NOTIFY_CLEANUP, &local_err)) { 1502 error_report_err(local_err); 1503 } 1504 1505 trace_savevm_state_cleanup(); 1506 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 1507 if (se->ops && se->ops->save_cleanup) { 1508 se->ops->save_cleanup(se->opaque); 1509 } 1510 } 1511 } 1512 1513 static int qemu_savevm_state(QEMUFile *f, Error **errp) 1514 { 1515 int ret; 1516 MigrationState *ms = migrate_get_current(); 1517 MigrationStatus status; 1518 1519 if (migration_is_running(ms->state)) { 1520 error_setg(errp, QERR_MIGRATION_ACTIVE); 1521 return -EINVAL; 1522 } 1523 1524 if (migrate_use_block()) { 1525 error_setg(errp, "Block migration and snapshots are incompatible"); 1526 return -EINVAL; 1527 } 1528 1529 migrate_init(ms); 1530 memset(&ram_counters, 0, sizeof(ram_counters)); 1531 ms->to_dst_file = f; 1532 1533 qemu_mutex_unlock_iothread(); 1534 qemu_savevm_state_header(f); 1535 qemu_savevm_state_setup(f); 1536 qemu_mutex_lock_iothread(); 1537 1538 while (qemu_file_get_error(f) == 0) { 1539 if (qemu_savevm_state_iterate(f, false) > 0) { 1540 break; 1541 } 1542 } 1543 1544 ret = qemu_file_get_error(f); 1545 if (ret == 0) { 1546 qemu_savevm_state_complete_precopy(f, false, false); 1547 ret = qemu_file_get_error(f); 1548 } 1549 qemu_savevm_state_cleanup(); 1550 if (ret != 0) { 1551 error_setg_errno(errp, -ret, "Error while writing VM state"); 1552 } 1553 1554 if (ret != 0) { 1555 status = MIGRATION_STATUS_FAILED; 1556 } else { 1557 status = MIGRATION_STATUS_COMPLETED; 1558 } 1559 migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status); 1560 1561 /* f is outer parameter, it should not stay in global migration state after 1562 * this function finished */ 1563 ms->to_dst_file = NULL; 1564 1565 return ret; 1566 } 1567 1568 void qemu_savevm_live_state(QEMUFile *f) 1569 { 1570 /* save QEMU_VM_SECTION_END section */ 1571 qemu_savevm_state_complete_precopy(f, true, false); 1572 qemu_put_byte(f, QEMU_VM_EOF); 1573 } 1574 1575 int qemu_save_device_state(QEMUFile *f) 1576 { 1577 SaveStateEntry *se; 1578 1579 if (!migration_in_colo_state()) { 1580 qemu_put_be32(f, QEMU_VM_FILE_MAGIC); 1581 qemu_put_be32(f, QEMU_VM_FILE_VERSION); 1582 } 1583 cpu_synchronize_all_states(); 1584 1585 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 1586 int ret; 1587 1588 if (se->is_ram) { 1589 continue; 1590 } 1591 if ((!se->ops || !se->ops->save_state) && !se->vmsd) { 1592 continue; 1593 } 1594 if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) { 1595 continue; 1596 } 1597 1598 save_section_header(f, se, QEMU_VM_SECTION_FULL); 1599 1600 ret = vmstate_save(f, se, NULL); 1601 if (ret) { 1602 return ret; 1603 } 1604 1605 save_section_footer(f, se); 1606 } 1607 1608 qemu_put_byte(f, QEMU_VM_EOF); 1609 1610 return qemu_file_get_error(f); 1611 } 1612 1613 static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id) 1614 { 1615 SaveStateEntry *se; 1616 1617 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 1618 if (!strcmp(se->idstr, idstr) && 1619 (instance_id == se->instance_id || 1620 instance_id == se->alias_id)) 1621 return se; 1622 /* Migrating from an older version? */ 1623 if (strstr(se->idstr, idstr) && se->compat) { 1624 if (!strcmp(se->compat->idstr, idstr) && 1625 (instance_id == se->compat->instance_id || 1626 instance_id == se->alias_id)) 1627 return se; 1628 } 1629 } 1630 return NULL; 1631 } 1632 1633 enum LoadVMExitCodes { 1634 /* Allow a command to quit all layers of nested loadvm loops */ 1635 LOADVM_QUIT = 1, 1636 }; 1637 1638 /* ------ incoming postcopy messages ------ */ 1639 /* 'advise' arrives before any transfers just to tell us that a postcopy 1640 * *might* happen - it might be skipped if precopy transferred everything 1641 * quickly. 1642 */ 1643 static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, 1644 uint16_t len) 1645 { 1646 PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE); 1647 uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps; 1648 Error *local_err = NULL; 1649 1650 trace_loadvm_postcopy_handle_advise(); 1651 if (ps != POSTCOPY_INCOMING_NONE) { 1652 error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps); 1653 return -1; 1654 } 1655 1656 switch (len) { 1657 case 0: 1658 if (migrate_postcopy_ram()) { 1659 error_report("RAM postcopy is enabled but have 0 byte advise"); 1660 return -EINVAL; 1661 } 1662 return 0; 1663 case 8 + 8: 1664 if (!migrate_postcopy_ram()) { 1665 error_report("RAM postcopy is disabled but have 16 byte advise"); 1666 return -EINVAL; 1667 } 1668 break; 1669 default: 1670 error_report("CMD_POSTCOPY_ADVISE invalid length (%d)", len); 1671 return -EINVAL; 1672 } 1673 1674 if (!postcopy_ram_supported_by_host(mis)) { 1675 postcopy_state_set(POSTCOPY_INCOMING_NONE); 1676 return -1; 1677 } 1678 1679 remote_pagesize_summary = qemu_get_be64(mis->from_src_file); 1680 local_pagesize_summary = ram_pagesize_summary(); 1681 1682 if (remote_pagesize_summary != local_pagesize_summary) { 1683 /* 1684 * This detects two potential causes of mismatch: 1685 * a) A mismatch in host page sizes 1686 * Some combinations of mismatch are probably possible but it gets 1687 * a bit more complicated. In particular we need to place whole 1688 * host pages on the dest at once, and we need to ensure that we 1689 * handle dirtying to make sure we never end up sending part of 1690 * a hostpage on it's own. 1691 * b) The use of different huge page sizes on source/destination 1692 * a more fine grain test is performed during RAM block migration 1693 * but this test here causes a nice early clear failure, and 1694 * also fails when passed to an older qemu that doesn't 1695 * do huge pages. 1696 */ 1697 error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64 1698 " d=%" PRIx64 ")", 1699 remote_pagesize_summary, local_pagesize_summary); 1700 return -1; 1701 } 1702 1703 remote_tps = qemu_get_be64(mis->from_src_file); 1704 if (remote_tps != qemu_target_page_size()) { 1705 /* 1706 * Again, some differences could be dealt with, but for now keep it 1707 * simple. 1708 */ 1709 error_report("Postcopy needs matching target page sizes (s=%d d=%zd)", 1710 (int)remote_tps, qemu_target_page_size()); 1711 return -1; 1712 } 1713 1714 if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) { 1715 error_report_err(local_err); 1716 return -1; 1717 } 1718 1719 if (ram_postcopy_incoming_init(mis)) { 1720 return -1; 1721 } 1722 1723 return 0; 1724 } 1725 1726 /* After postcopy we will be told to throw some pages away since they're 1727 * dirty and will have to be demand fetched. Must happen before CPU is 1728 * started. 1729 * There can be 0..many of these messages, each encoding multiple pages. 1730 */ 1731 static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis, 1732 uint16_t len) 1733 { 1734 int tmp; 1735 char ramid[256]; 1736 PostcopyState ps = postcopy_state_get(); 1737 1738 trace_loadvm_postcopy_ram_handle_discard(); 1739 1740 switch (ps) { 1741 case POSTCOPY_INCOMING_ADVISE: 1742 /* 1st discard */ 1743 tmp = postcopy_ram_prepare_discard(mis); 1744 if (tmp) { 1745 return tmp; 1746 } 1747 break; 1748 1749 case POSTCOPY_INCOMING_DISCARD: 1750 /* Expected state */ 1751 break; 1752 1753 default: 1754 error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)", 1755 ps); 1756 return -1; 1757 } 1758 /* We're expecting a 1759 * Version (0) 1760 * a RAM ID string (length byte, name, 0 term) 1761 * then at least 1 16 byte chunk 1762 */ 1763 if (len < (1 + 1 + 1 + 1 + 2 * 8)) { 1764 error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len); 1765 return -1; 1766 } 1767 1768 tmp = qemu_get_byte(mis->from_src_file); 1769 if (tmp != postcopy_ram_discard_version) { 1770 error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp); 1771 return -1; 1772 } 1773 1774 if (!qemu_get_counted_string(mis->from_src_file, ramid)) { 1775 error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID"); 1776 return -1; 1777 } 1778 tmp = qemu_get_byte(mis->from_src_file); 1779 if (tmp != 0) { 1780 error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp); 1781 return -1; 1782 } 1783 1784 len -= 3 + strlen(ramid); 1785 if (len % 16) { 1786 error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len); 1787 return -1; 1788 } 1789 trace_loadvm_postcopy_ram_handle_discard_header(ramid, len); 1790 while (len) { 1791 uint64_t start_addr, block_length; 1792 start_addr = qemu_get_be64(mis->from_src_file); 1793 block_length = qemu_get_be64(mis->from_src_file); 1794 1795 len -= 16; 1796 int ret = ram_discard_range(ramid, start_addr, block_length); 1797 if (ret) { 1798 return ret; 1799 } 1800 } 1801 trace_loadvm_postcopy_ram_handle_discard_end(); 1802 1803 return 0; 1804 } 1805 1806 /* 1807 * Triggered by a postcopy_listen command; this thread takes over reading 1808 * the input stream, leaving the main thread free to carry on loading the rest 1809 * of the device state (from RAM). 1810 * (TODO:This could do with being in a postcopy file - but there again it's 1811 * just another input loop, not that postcopy specific) 1812 */ 1813 static void *postcopy_ram_listen_thread(void *opaque) 1814 { 1815 MigrationIncomingState *mis = migration_incoming_get_current(); 1816 QEMUFile *f = mis->from_src_file; 1817 int load_res; 1818 MigrationState *migr = migrate_get_current(); 1819 1820 object_ref(OBJECT(migr)); 1821 1822 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 1823 MIGRATION_STATUS_POSTCOPY_ACTIVE); 1824 qemu_sem_post(&mis->listen_thread_sem); 1825 trace_postcopy_ram_listen_thread_start(); 1826 1827 rcu_register_thread(); 1828 /* 1829 * Because we're a thread and not a coroutine we can't yield 1830 * in qemu_file, and thus we must be blocking now. 1831 */ 1832 qemu_file_set_blocking(f, true); 1833 load_res = qemu_loadvm_state_main(f, mis); 1834 1835 /* 1836 * This is tricky, but, mis->from_src_file can change after it 1837 * returns, when postcopy recovery happened. In the future, we may 1838 * want a wrapper for the QEMUFile handle. 1839 */ 1840 f = mis->from_src_file; 1841 1842 /* And non-blocking again so we don't block in any cleanup */ 1843 qemu_file_set_blocking(f, false); 1844 1845 trace_postcopy_ram_listen_thread_exit(); 1846 if (load_res < 0) { 1847 qemu_file_set_error(f, load_res); 1848 dirty_bitmap_mig_cancel_incoming(); 1849 if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING && 1850 !migrate_postcopy_ram() && migrate_dirty_bitmaps()) 1851 { 1852 error_report("%s: loadvm failed during postcopy: %d. All states " 1853 "are migrated except dirty bitmaps. Some dirty " 1854 "bitmaps may be lost, and present migrated dirty " 1855 "bitmaps are correctly migrated and valid.", 1856 __func__, load_res); 1857 load_res = 0; /* prevent further exit() */ 1858 } else { 1859 error_report("%s: loadvm failed: %d", __func__, load_res); 1860 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 1861 MIGRATION_STATUS_FAILED); 1862 } 1863 } 1864 if (load_res >= 0) { 1865 /* 1866 * This looks good, but it's possible that the device loading in the 1867 * main thread hasn't finished yet, and so we might not be in 'RUN' 1868 * state yet; wait for the end of the main thread. 1869 */ 1870 qemu_event_wait(&mis->main_thread_load_event); 1871 } 1872 postcopy_ram_incoming_cleanup(mis); 1873 1874 if (load_res < 0) { 1875 /* 1876 * If something went wrong then we have a bad state so exit; 1877 * depending how far we got it might be possible at this point 1878 * to leave the guest running and fire MCEs for pages that never 1879 * arrived as a desperate recovery step. 1880 */ 1881 rcu_unregister_thread(); 1882 exit(EXIT_FAILURE); 1883 } 1884 1885 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 1886 MIGRATION_STATUS_COMPLETED); 1887 /* 1888 * If everything has worked fine, then the main thread has waited 1889 * for us to start, and we're the last use of the mis. 1890 * (If something broke then qemu will have to exit anyway since it's 1891 * got a bad migration state). 1892 */ 1893 migration_incoming_state_destroy(); 1894 qemu_loadvm_state_cleanup(); 1895 1896 rcu_unregister_thread(); 1897 mis->have_listen_thread = false; 1898 postcopy_state_set(POSTCOPY_INCOMING_END); 1899 1900 object_unref(OBJECT(migr)); 1901 1902 return NULL; 1903 } 1904 1905 /* After this message we must be able to immediately receive postcopy data */ 1906 static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) 1907 { 1908 PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING); 1909 trace_loadvm_postcopy_handle_listen(); 1910 Error *local_err = NULL; 1911 1912 if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) { 1913 error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps); 1914 return -1; 1915 } 1916 if (ps == POSTCOPY_INCOMING_ADVISE) { 1917 /* 1918 * A rare case, we entered listen without having to do any discards, 1919 * so do the setup that's normally done at the time of the 1st discard. 1920 */ 1921 if (migrate_postcopy_ram()) { 1922 postcopy_ram_prepare_discard(mis); 1923 } 1924 } 1925 1926 /* 1927 * Sensitise RAM - can now generate requests for blocks that don't exist 1928 * However, at this point the CPU shouldn't be running, and the IO 1929 * shouldn't be doing anything yet so don't actually expect requests 1930 */ 1931 if (migrate_postcopy_ram()) { 1932 if (postcopy_ram_incoming_setup(mis)) { 1933 postcopy_ram_incoming_cleanup(mis); 1934 return -1; 1935 } 1936 } 1937 1938 if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) { 1939 error_report_err(local_err); 1940 return -1; 1941 } 1942 1943 mis->have_listen_thread = true; 1944 /* Start up the listening thread and wait for it to signal ready */ 1945 qemu_sem_init(&mis->listen_thread_sem, 0); 1946 qemu_thread_create(&mis->listen_thread, "postcopy/listen", 1947 postcopy_ram_listen_thread, NULL, 1948 QEMU_THREAD_DETACHED); 1949 qemu_sem_wait(&mis->listen_thread_sem); 1950 qemu_sem_destroy(&mis->listen_thread_sem); 1951 1952 return 0; 1953 } 1954 1955 static void loadvm_postcopy_handle_run_bh(void *opaque) 1956 { 1957 Error *local_err = NULL; 1958 MigrationIncomingState *mis = opaque; 1959 1960 /* TODO we should move all of this lot into postcopy_ram.c or a shared code 1961 * in migration.c 1962 */ 1963 cpu_synchronize_all_post_init(); 1964 1965 qemu_announce_self(&mis->announce_timer, migrate_announce_params()); 1966 1967 /* Make sure all file formats flush their mutable metadata. 1968 * If we get an error here, just don't restart the VM yet. */ 1969 bdrv_invalidate_cache_all(&local_err); 1970 if (local_err) { 1971 error_report_err(local_err); 1972 local_err = NULL; 1973 autostart = false; 1974 } 1975 1976 trace_loadvm_postcopy_handle_run_cpu_sync(); 1977 1978 trace_loadvm_postcopy_handle_run_vmstart(); 1979 1980 dirty_bitmap_mig_before_vm_start(); 1981 1982 if (autostart) { 1983 /* Hold onto your hats, starting the CPU */ 1984 vm_start(); 1985 } else { 1986 /* leave it paused and let management decide when to start the CPU */ 1987 runstate_set(RUN_STATE_PAUSED); 1988 } 1989 1990 qemu_bh_delete(mis->bh); 1991 } 1992 1993 /* After all discards we can start running and asking for pages */ 1994 static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) 1995 { 1996 PostcopyState ps = postcopy_state_get(); 1997 1998 trace_loadvm_postcopy_handle_run(); 1999 if (ps != POSTCOPY_INCOMING_LISTENING) { 2000 error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps); 2001 return -1; 2002 } 2003 2004 postcopy_state_set(POSTCOPY_INCOMING_RUNNING); 2005 mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis); 2006 qemu_bh_schedule(mis->bh); 2007 2008 /* We need to finish reading the stream from the package 2009 * and also stop reading anything more from the stream that loaded the 2010 * package (since it's now being read by the listener thread). 2011 * LOADVM_QUIT will quit all the layers of nested loadvm loops. 2012 */ 2013 return LOADVM_QUIT; 2014 } 2015 2016 /* We must be with page_request_mutex held */ 2017 static gboolean postcopy_sync_page_req(gpointer key, gpointer value, 2018 gpointer data) 2019 { 2020 MigrationIncomingState *mis = data; 2021 void *host_addr = (void *) key; 2022 ram_addr_t rb_offset; 2023 RAMBlock *rb; 2024 int ret; 2025 2026 rb = qemu_ram_block_from_host(host_addr, true, &rb_offset); 2027 if (!rb) { 2028 /* 2029 * This should _never_ happen. However be nice for a migrating VM to 2030 * not crash/assert. Post an error (note: intended to not use *_once 2031 * because we do want to see all the illegal addresses; and this can 2032 * never be triggered by the guest so we're safe) and move on next. 2033 */ 2034 error_report("%s: illegal host addr %p", __func__, host_addr); 2035 /* Try the next entry */ 2036 return FALSE; 2037 } 2038 2039 ret = migrate_send_rp_message_req_pages(mis, rb, rb_offset); 2040 if (ret) { 2041 /* Please refer to above comment. */ 2042 error_report("%s: send rp message failed for addr %p", 2043 __func__, host_addr); 2044 return FALSE; 2045 } 2046 2047 trace_postcopy_page_req_sync(host_addr); 2048 2049 return FALSE; 2050 } 2051 2052 static void migrate_send_rp_req_pages_pending(MigrationIncomingState *mis) 2053 { 2054 WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { 2055 g_tree_foreach(mis->page_requested, postcopy_sync_page_req, mis); 2056 } 2057 } 2058 2059 static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis) 2060 { 2061 if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) { 2062 error_report("%s: illegal resume received", __func__); 2063 /* Don't fail the load, only for this. */ 2064 return 0; 2065 } 2066 2067 /* 2068 * Reset the last_rb before we resend any page req to source again, since 2069 * the source should have it reset already. 2070 */ 2071 mis->last_rb = NULL; 2072 2073 /* 2074 * This means source VM is ready to resume the postcopy migration. 2075 */ 2076 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER, 2077 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2078 2079 trace_loadvm_postcopy_handle_resume(); 2080 2081 /* Tell source that "we are ready" */ 2082 migrate_send_rp_resume_ack(mis, MIGRATION_RESUME_ACK_VALUE); 2083 2084 /* 2085 * After a postcopy recovery, the source should have lost the postcopy 2086 * queue, or potentially the requested pages could have been lost during 2087 * the network down phase. Let's re-sync with the source VM by re-sending 2088 * all the pending pages that we eagerly need, so these threads won't get 2089 * blocked too long due to the recovery. 2090 * 2091 * Without this procedure, the faulted destination VM threads (waiting for 2092 * page requests right before the postcopy is interrupted) can keep hanging 2093 * until the pages are sent by the source during the background copying of 2094 * pages, or another thread faulted on the same address accidentally. 2095 */ 2096 migrate_send_rp_req_pages_pending(mis); 2097 2098 /* 2099 * It's time to switch state and release the fault thread to continue 2100 * service page faults. Note that this should be explicitly after the 2101 * above call to migrate_send_rp_req_pages_pending(). In short: 2102 * migrate_send_rp_message_req_pages() is not thread safe, yet. 2103 */ 2104 qemu_sem_post(&mis->postcopy_pause_sem_fault); 2105 2106 return 0; 2107 } 2108 2109 /** 2110 * Immediately following this command is a blob of data containing an embedded 2111 * chunk of migration stream; read it and load it. 2112 * 2113 * @mis: Incoming state 2114 * @length: Length of packaged data to read 2115 * 2116 * Returns: Negative values on error 2117 * 2118 */ 2119 static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis) 2120 { 2121 int ret; 2122 size_t length; 2123 QIOChannelBuffer *bioc; 2124 2125 length = qemu_get_be32(mis->from_src_file); 2126 trace_loadvm_handle_cmd_packaged(length); 2127 2128 if (length > MAX_VM_CMD_PACKAGED_SIZE) { 2129 error_report("Unreasonably large packaged state: %zu", length); 2130 return -1; 2131 } 2132 2133 bioc = qio_channel_buffer_new(length); 2134 qio_channel_set_name(QIO_CHANNEL(bioc), "migration-loadvm-buffer"); 2135 ret = qemu_get_buffer(mis->from_src_file, 2136 bioc->data, 2137 length); 2138 if (ret != length) { 2139 object_unref(OBJECT(bioc)); 2140 error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%zu", 2141 ret, length); 2142 return (ret < 0) ? ret : -EAGAIN; 2143 } 2144 bioc->usage += length; 2145 trace_loadvm_handle_cmd_packaged_received(ret); 2146 2147 QEMUFile *packf = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); 2148 2149 ret = qemu_loadvm_state_main(packf, mis); 2150 trace_loadvm_handle_cmd_packaged_main(ret); 2151 qemu_fclose(packf); 2152 object_unref(OBJECT(bioc)); 2153 2154 return ret; 2155 } 2156 2157 /* 2158 * Handle request that source requests for recved_bitmap on 2159 * destination. Payload format: 2160 * 2161 * len (1 byte) + ramblock_name (<255 bytes) 2162 */ 2163 static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis, 2164 uint16_t len) 2165 { 2166 QEMUFile *file = mis->from_src_file; 2167 RAMBlock *rb; 2168 char block_name[256]; 2169 size_t cnt; 2170 2171 cnt = qemu_get_counted_string(file, block_name); 2172 if (!cnt) { 2173 error_report("%s: failed to read block name", __func__); 2174 return -EINVAL; 2175 } 2176 2177 /* Validate before using the data */ 2178 if (qemu_file_get_error(file)) { 2179 return qemu_file_get_error(file); 2180 } 2181 2182 if (len != cnt + 1) { 2183 error_report("%s: invalid payload length (%d)", __func__, len); 2184 return -EINVAL; 2185 } 2186 2187 rb = qemu_ram_block_by_name(block_name); 2188 if (!rb) { 2189 error_report("%s: block '%s' not found", __func__, block_name); 2190 return -EINVAL; 2191 } 2192 2193 migrate_send_rp_recv_bitmap(mis, block_name); 2194 2195 trace_loadvm_handle_recv_bitmap(block_name); 2196 2197 return 0; 2198 } 2199 2200 static int loadvm_process_enable_colo(MigrationIncomingState *mis) 2201 { 2202 int ret = migration_incoming_enable_colo(); 2203 2204 if (!ret) { 2205 ret = colo_init_ram_cache(); 2206 if (ret) { 2207 migration_incoming_disable_colo(); 2208 } 2209 } 2210 return ret; 2211 } 2212 2213 /* 2214 * Process an incoming 'QEMU_VM_COMMAND' 2215 * 0 just a normal return 2216 * LOADVM_QUIT All good, but exit the loop 2217 * <0 Error 2218 */ 2219 static int loadvm_process_command(QEMUFile *f) 2220 { 2221 MigrationIncomingState *mis = migration_incoming_get_current(); 2222 uint16_t cmd; 2223 uint16_t len; 2224 uint32_t tmp32; 2225 2226 cmd = qemu_get_be16(f); 2227 len = qemu_get_be16(f); 2228 2229 /* Check validity before continue processing of cmds */ 2230 if (qemu_file_get_error(f)) { 2231 return qemu_file_get_error(f); 2232 } 2233 2234 trace_loadvm_process_command(cmd, len); 2235 if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) { 2236 error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len); 2237 return -EINVAL; 2238 } 2239 2240 if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) { 2241 error_report("%s received with bad length - expecting %zu, got %d", 2242 mig_cmd_args[cmd].name, 2243 (size_t)mig_cmd_args[cmd].len, len); 2244 return -ERANGE; 2245 } 2246 2247 switch (cmd) { 2248 case MIG_CMD_OPEN_RETURN_PATH: 2249 if (mis->to_src_file) { 2250 error_report("CMD_OPEN_RETURN_PATH called when RP already open"); 2251 /* Not really a problem, so don't give up */ 2252 return 0; 2253 } 2254 mis->to_src_file = qemu_file_get_return_path(f); 2255 if (!mis->to_src_file) { 2256 error_report("CMD_OPEN_RETURN_PATH failed"); 2257 return -1; 2258 } 2259 break; 2260 2261 case MIG_CMD_PING: 2262 tmp32 = qemu_get_be32(f); 2263 trace_loadvm_process_command_ping(tmp32); 2264 if (!mis->to_src_file) { 2265 error_report("CMD_PING (0x%x) received with no return path", 2266 tmp32); 2267 return -1; 2268 } 2269 migrate_send_rp_pong(mis, tmp32); 2270 break; 2271 2272 case MIG_CMD_PACKAGED: 2273 return loadvm_handle_cmd_packaged(mis); 2274 2275 case MIG_CMD_POSTCOPY_ADVISE: 2276 return loadvm_postcopy_handle_advise(mis, len); 2277 2278 case MIG_CMD_POSTCOPY_LISTEN: 2279 return loadvm_postcopy_handle_listen(mis); 2280 2281 case MIG_CMD_POSTCOPY_RUN: 2282 return loadvm_postcopy_handle_run(mis); 2283 2284 case MIG_CMD_POSTCOPY_RAM_DISCARD: 2285 return loadvm_postcopy_ram_handle_discard(mis, len); 2286 2287 case MIG_CMD_POSTCOPY_RESUME: 2288 return loadvm_postcopy_handle_resume(mis); 2289 2290 case MIG_CMD_RECV_BITMAP: 2291 return loadvm_handle_recv_bitmap(mis, len); 2292 2293 case MIG_CMD_ENABLE_COLO: 2294 return loadvm_process_enable_colo(mis); 2295 } 2296 2297 return 0; 2298 } 2299 2300 /* 2301 * Read a footer off the wire and check that it matches the expected section 2302 * 2303 * Returns: true if the footer was good 2304 * false if there is a problem (and calls error_report to say why) 2305 */ 2306 static bool check_section_footer(QEMUFile *f, SaveStateEntry *se) 2307 { 2308 int ret; 2309 uint8_t read_mark; 2310 uint32_t read_section_id; 2311 2312 if (!migrate_get_current()->send_section_footer) { 2313 /* No footer to check */ 2314 return true; 2315 } 2316 2317 read_mark = qemu_get_byte(f); 2318 2319 ret = qemu_file_get_error(f); 2320 if (ret) { 2321 error_report("%s: Read section footer failed: %d", 2322 __func__, ret); 2323 return false; 2324 } 2325 2326 if (read_mark != QEMU_VM_SECTION_FOOTER) { 2327 error_report("Missing section footer for %s", se->idstr); 2328 return false; 2329 } 2330 2331 read_section_id = qemu_get_be32(f); 2332 if (read_section_id != se->load_section_id) { 2333 error_report("Mismatched section id in footer for %s -" 2334 " read 0x%x expected 0x%x", 2335 se->idstr, read_section_id, se->load_section_id); 2336 return false; 2337 } 2338 2339 /* All good */ 2340 return true; 2341 } 2342 2343 static int 2344 qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) 2345 { 2346 uint32_t instance_id, version_id, section_id; 2347 SaveStateEntry *se; 2348 char idstr[256]; 2349 int ret; 2350 2351 /* Read section start */ 2352 section_id = qemu_get_be32(f); 2353 if (!qemu_get_counted_string(f, idstr)) { 2354 error_report("Unable to read ID string for section %u", 2355 section_id); 2356 return -EINVAL; 2357 } 2358 instance_id = qemu_get_be32(f); 2359 version_id = qemu_get_be32(f); 2360 2361 ret = qemu_file_get_error(f); 2362 if (ret) { 2363 error_report("%s: Failed to read instance/version ID: %d", 2364 __func__, ret); 2365 return ret; 2366 } 2367 2368 trace_qemu_loadvm_state_section_startfull(section_id, idstr, 2369 instance_id, version_id); 2370 /* Find savevm section */ 2371 se = find_se(idstr, instance_id); 2372 if (se == NULL) { 2373 error_report("Unknown savevm section or instance '%s' %"PRIu32". " 2374 "Make sure that your current VM setup matches your " 2375 "saved VM setup, including any hotplugged devices", 2376 idstr, instance_id); 2377 return -EINVAL; 2378 } 2379 2380 /* Validate version */ 2381 if (version_id > se->version_id) { 2382 error_report("savevm: unsupported version %d for '%s' v%d", 2383 version_id, idstr, se->version_id); 2384 return -EINVAL; 2385 } 2386 se->load_version_id = version_id; 2387 se->load_section_id = section_id; 2388 2389 /* Validate if it is a device's state */ 2390 if (xen_enabled() && se->is_ram) { 2391 error_report("loadvm: %s RAM loading not allowed on Xen", idstr); 2392 return -EINVAL; 2393 } 2394 2395 ret = vmstate_load(f, se); 2396 if (ret < 0) { 2397 error_report("error while loading state for instance 0x%"PRIx32" of" 2398 " device '%s'", instance_id, idstr); 2399 return ret; 2400 } 2401 if (!check_section_footer(f, se)) { 2402 return -EINVAL; 2403 } 2404 2405 return 0; 2406 } 2407 2408 static int 2409 qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis) 2410 { 2411 uint32_t section_id; 2412 SaveStateEntry *se; 2413 int ret; 2414 2415 section_id = qemu_get_be32(f); 2416 2417 ret = qemu_file_get_error(f); 2418 if (ret) { 2419 error_report("%s: Failed to read section ID: %d", 2420 __func__, ret); 2421 return ret; 2422 } 2423 2424 trace_qemu_loadvm_state_section_partend(section_id); 2425 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 2426 if (se->load_section_id == section_id) { 2427 break; 2428 } 2429 } 2430 if (se == NULL) { 2431 error_report("Unknown savevm section %d", section_id); 2432 return -EINVAL; 2433 } 2434 2435 ret = vmstate_load(f, se); 2436 if (ret < 0) { 2437 error_report("error while loading state section id %d(%s)", 2438 section_id, se->idstr); 2439 return ret; 2440 } 2441 if (!check_section_footer(f, se)) { 2442 return -EINVAL; 2443 } 2444 2445 return 0; 2446 } 2447 2448 static int qemu_loadvm_state_header(QEMUFile *f) 2449 { 2450 unsigned int v; 2451 int ret; 2452 2453 v = qemu_get_be32(f); 2454 if (v != QEMU_VM_FILE_MAGIC) { 2455 error_report("Not a migration stream"); 2456 return -EINVAL; 2457 } 2458 2459 v = qemu_get_be32(f); 2460 if (v == QEMU_VM_FILE_VERSION_COMPAT) { 2461 error_report("SaveVM v2 format is obsolete and don't work anymore"); 2462 return -ENOTSUP; 2463 } 2464 if (v != QEMU_VM_FILE_VERSION) { 2465 error_report("Unsupported migration stream version"); 2466 return -ENOTSUP; 2467 } 2468 2469 if (migrate_get_current()->send_configuration) { 2470 if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { 2471 error_report("Configuration section missing"); 2472 qemu_loadvm_state_cleanup(); 2473 return -EINVAL; 2474 } 2475 ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0); 2476 2477 if (ret) { 2478 qemu_loadvm_state_cleanup(); 2479 return ret; 2480 } 2481 } 2482 return 0; 2483 } 2484 2485 static int qemu_loadvm_state_setup(QEMUFile *f) 2486 { 2487 SaveStateEntry *se; 2488 int ret; 2489 2490 trace_loadvm_state_setup(); 2491 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 2492 if (!se->ops || !se->ops->load_setup) { 2493 continue; 2494 } 2495 if (se->ops->is_active) { 2496 if (!se->ops->is_active(se->opaque)) { 2497 continue; 2498 } 2499 } 2500 2501 ret = se->ops->load_setup(f, se->opaque); 2502 if (ret < 0) { 2503 qemu_file_set_error(f, ret); 2504 error_report("Load state of device %s failed", se->idstr); 2505 return ret; 2506 } 2507 } 2508 return 0; 2509 } 2510 2511 void qemu_loadvm_state_cleanup(void) 2512 { 2513 SaveStateEntry *se; 2514 2515 trace_loadvm_state_cleanup(); 2516 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { 2517 if (se->ops && se->ops->load_cleanup) { 2518 se->ops->load_cleanup(se->opaque); 2519 } 2520 } 2521 } 2522 2523 /* Return true if we should continue the migration, or false. */ 2524 static bool postcopy_pause_incoming(MigrationIncomingState *mis) 2525 { 2526 trace_postcopy_pause_incoming(); 2527 2528 assert(migrate_postcopy_ram()); 2529 2530 /* Clear the triggered bit to allow one recovery */ 2531 mis->postcopy_recover_triggered = false; 2532 2533 assert(mis->from_src_file); 2534 qemu_file_shutdown(mis->from_src_file); 2535 qemu_fclose(mis->from_src_file); 2536 mis->from_src_file = NULL; 2537 2538 assert(mis->to_src_file); 2539 qemu_file_shutdown(mis->to_src_file); 2540 qemu_mutex_lock(&mis->rp_mutex); 2541 qemu_fclose(mis->to_src_file); 2542 mis->to_src_file = NULL; 2543 qemu_mutex_unlock(&mis->rp_mutex); 2544 2545 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 2546 MIGRATION_STATUS_POSTCOPY_PAUSED); 2547 2548 /* Notify the fault thread for the invalidated file handle */ 2549 postcopy_fault_thread_notify(mis); 2550 2551 error_report("Detected IO failure for postcopy. " 2552 "Migration paused."); 2553 2554 while (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 2555 qemu_sem_wait(&mis->postcopy_pause_sem_dst); 2556 } 2557 2558 trace_postcopy_pause_incoming_continued(); 2559 2560 return true; 2561 } 2562 2563 int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis) 2564 { 2565 uint8_t section_type; 2566 int ret = 0; 2567 2568 retry: 2569 while (true) { 2570 section_type = qemu_get_byte(f); 2571 2572 if (qemu_file_get_error(f)) { 2573 ret = qemu_file_get_error(f); 2574 break; 2575 } 2576 2577 trace_qemu_loadvm_state_section(section_type); 2578 switch (section_type) { 2579 case QEMU_VM_SECTION_START: 2580 case QEMU_VM_SECTION_FULL: 2581 ret = qemu_loadvm_section_start_full(f, mis); 2582 if (ret < 0) { 2583 goto out; 2584 } 2585 break; 2586 case QEMU_VM_SECTION_PART: 2587 case QEMU_VM_SECTION_END: 2588 ret = qemu_loadvm_section_part_end(f, mis); 2589 if (ret < 0) { 2590 goto out; 2591 } 2592 break; 2593 case QEMU_VM_COMMAND: 2594 ret = loadvm_process_command(f); 2595 trace_qemu_loadvm_state_section_command(ret); 2596 if ((ret < 0) || (ret == LOADVM_QUIT)) { 2597 goto out; 2598 } 2599 break; 2600 case QEMU_VM_EOF: 2601 /* This is the end of migration */ 2602 goto out; 2603 default: 2604 error_report("Unknown savevm section type %d", section_type); 2605 ret = -EINVAL; 2606 goto out; 2607 } 2608 } 2609 2610 out: 2611 if (ret < 0) { 2612 qemu_file_set_error(f, ret); 2613 2614 /* Cancel bitmaps incoming regardless of recovery */ 2615 dirty_bitmap_mig_cancel_incoming(); 2616 2617 /* 2618 * If we are during an active postcopy, then we pause instead 2619 * of bail out to at least keep the VM's dirty data. Note 2620 * that POSTCOPY_INCOMING_LISTENING stage is still not enough, 2621 * during which we're still receiving device states and we 2622 * still haven't yet started the VM on destination. 2623 * 2624 * Only RAM postcopy supports recovery. Still, if RAM postcopy is 2625 * enabled, canceled bitmaps postcopy will not affect RAM postcopy 2626 * recovering. 2627 */ 2628 if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING && 2629 migrate_postcopy_ram() && postcopy_pause_incoming(mis)) { 2630 /* Reset f to point to the newly created channel */ 2631 f = mis->from_src_file; 2632 goto retry; 2633 } 2634 } 2635 return ret; 2636 } 2637 2638 int qemu_loadvm_state(QEMUFile *f) 2639 { 2640 MigrationIncomingState *mis = migration_incoming_get_current(); 2641 Error *local_err = NULL; 2642 int ret; 2643 2644 if (qemu_savevm_state_blocked(&local_err)) { 2645 error_report_err(local_err); 2646 return -EINVAL; 2647 } 2648 2649 ret = qemu_loadvm_state_header(f); 2650 if (ret) { 2651 return ret; 2652 } 2653 2654 if (qemu_loadvm_state_setup(f) != 0) { 2655 return -EINVAL; 2656 } 2657 2658 cpu_synchronize_all_pre_loadvm(); 2659 2660 ret = qemu_loadvm_state_main(f, mis); 2661 qemu_event_set(&mis->main_thread_load_event); 2662 2663 trace_qemu_loadvm_state_post_main(ret); 2664 2665 if (mis->have_listen_thread) { 2666 /* Listen thread still going, can't clean up yet */ 2667 return ret; 2668 } 2669 2670 if (ret == 0) { 2671 ret = qemu_file_get_error(f); 2672 } 2673 2674 /* 2675 * Try to read in the VMDESC section as well, so that dumping tools that 2676 * intercept our migration stream have the chance to see it. 2677 */ 2678 2679 /* We've got to be careful; if we don't read the data and just shut the fd 2680 * then the sender can error if we close while it's still sending. 2681 * We also mustn't read data that isn't there; some transports (RDMA) 2682 * will stall waiting for that data when the source has already closed. 2683 */ 2684 if (ret == 0 && should_send_vmdesc()) { 2685 uint8_t *buf; 2686 uint32_t size; 2687 uint8_t section_type = qemu_get_byte(f); 2688 2689 if (section_type != QEMU_VM_VMDESCRIPTION) { 2690 error_report("Expected vmdescription section, but got %d", 2691 section_type); 2692 /* 2693 * It doesn't seem worth failing at this point since 2694 * we apparently have an otherwise valid VM state 2695 */ 2696 } else { 2697 buf = g_malloc(0x1000); 2698 size = qemu_get_be32(f); 2699 2700 while (size > 0) { 2701 uint32_t read_chunk = MIN(size, 0x1000); 2702 qemu_get_buffer(f, buf, read_chunk); 2703 size -= read_chunk; 2704 } 2705 g_free(buf); 2706 } 2707 } 2708 2709 qemu_loadvm_state_cleanup(); 2710 cpu_synchronize_all_post_init(); 2711 2712 return ret; 2713 } 2714 2715 int qemu_load_device_state(QEMUFile *f) 2716 { 2717 MigrationIncomingState *mis = migration_incoming_get_current(); 2718 int ret; 2719 2720 /* Load QEMU_VM_SECTION_FULL section */ 2721 ret = qemu_loadvm_state_main(f, mis); 2722 if (ret < 0) { 2723 error_report("Failed to load device state: %d", ret); 2724 return ret; 2725 } 2726 2727 cpu_synchronize_all_post_init(); 2728 return 0; 2729 } 2730 2731 int save_snapshot(const char *name, Error **errp) 2732 { 2733 BlockDriverState *bs, *bs1; 2734 QEMUSnapshotInfo sn1, *sn = &sn1; 2735 int ret = -1, ret2; 2736 QEMUFile *f; 2737 int saved_vm_running; 2738 uint64_t vm_state_size; 2739 qemu_timeval tv; 2740 struct tm tm; 2741 AioContext *aio_context; 2742 2743 if (migration_is_blocked(errp)) { 2744 return ret; 2745 } 2746 2747 if (!replay_can_snapshot()) { 2748 error_setg(errp, "Record/replay does not allow making snapshot " 2749 "right now. Try once more later."); 2750 return ret; 2751 } 2752 2753 if (!bdrv_all_can_snapshot(&bs)) { 2754 error_setg(errp, "Device '%s' is writable but does not support " 2755 "snapshots", bdrv_get_device_or_node_name(bs)); 2756 return ret; 2757 } 2758 2759 /* Delete old snapshots of the same name */ 2760 if (name) { 2761 ret = bdrv_all_delete_snapshot(name, &bs1, errp); 2762 if (ret < 0) { 2763 error_prepend(errp, "Error while deleting snapshot on device " 2764 "'%s': ", bdrv_get_device_or_node_name(bs1)); 2765 return ret; 2766 } 2767 } 2768 2769 bs = bdrv_all_find_vmstate_bs(); 2770 if (bs == NULL) { 2771 error_setg(errp, "No block device can accept snapshots"); 2772 return ret; 2773 } 2774 aio_context = bdrv_get_aio_context(bs); 2775 2776 saved_vm_running = runstate_is_running(); 2777 2778 ret = global_state_store(); 2779 if (ret) { 2780 error_setg(errp, "Error saving global state"); 2781 return ret; 2782 } 2783 vm_stop(RUN_STATE_SAVE_VM); 2784 2785 bdrv_drain_all_begin(); 2786 2787 aio_context_acquire(aio_context); 2788 2789 memset(sn, 0, sizeof(*sn)); 2790 2791 /* fill auxiliary fields */ 2792 qemu_gettimeofday(&tv); 2793 sn->date_sec = tv.tv_sec; 2794 sn->date_nsec = tv.tv_usec * 1000; 2795 sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); 2796 if (replay_mode != REPLAY_MODE_NONE) { 2797 sn->icount = replay_get_current_icount(); 2798 } else { 2799 sn->icount = -1ULL; 2800 } 2801 2802 if (name) { 2803 pstrcpy(sn->name, sizeof(sn->name), name); 2804 } else { 2805 /* cast below needed for OpenBSD where tv_sec is still 'long' */ 2806 localtime_r((const time_t *)&tv.tv_sec, &tm); 2807 strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm); 2808 } 2809 2810 /* save the VM state */ 2811 f = qemu_fopen_bdrv(bs, 1); 2812 if (!f) { 2813 error_setg(errp, "Could not open VM state file"); 2814 goto the_end; 2815 } 2816 ret = qemu_savevm_state(f, errp); 2817 vm_state_size = qemu_ftell(f); 2818 ret2 = qemu_fclose(f); 2819 if (ret < 0) { 2820 goto the_end; 2821 } 2822 if (ret2 < 0) { 2823 ret = ret2; 2824 goto the_end; 2825 } 2826 2827 /* The bdrv_all_create_snapshot() call that follows acquires the AioContext 2828 * for itself. BDRV_POLL_WHILE() does not support nested locking because 2829 * it only releases the lock once. Therefore synchronous I/O will deadlock 2830 * unless we release the AioContext before bdrv_all_create_snapshot(). 2831 */ 2832 aio_context_release(aio_context); 2833 aio_context = NULL; 2834 2835 ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs); 2836 if (ret < 0) { 2837 error_setg(errp, "Error while creating snapshot on '%s'", 2838 bdrv_get_device_or_node_name(bs)); 2839 bdrv_all_delete_snapshot(sn->name, &bs, NULL); 2840 goto the_end; 2841 } 2842 2843 ret = 0; 2844 2845 the_end: 2846 if (aio_context) { 2847 aio_context_release(aio_context); 2848 } 2849 2850 bdrv_drain_all_end(); 2851 2852 if (saved_vm_running) { 2853 vm_start(); 2854 } 2855 return ret; 2856 } 2857 2858 void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live, 2859 Error **errp) 2860 { 2861 QEMUFile *f; 2862 QIOChannelFile *ioc; 2863 int saved_vm_running; 2864 int ret; 2865 2866 if (!has_live) { 2867 /* live default to true so old version of Xen tool stack can have a 2868 * successful live migration */ 2869 live = true; 2870 } 2871 2872 saved_vm_running = runstate_is_running(); 2873 vm_stop(RUN_STATE_SAVE_VM); 2874 global_state_store_running(); 2875 2876 ioc = qio_channel_file_new_path(filename, O_WRONLY | O_CREAT | O_TRUNC, 2877 0660, errp); 2878 if (!ioc) { 2879 goto the_end; 2880 } 2881 qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-save-state"); 2882 f = qemu_fopen_channel_output(QIO_CHANNEL(ioc)); 2883 object_unref(OBJECT(ioc)); 2884 ret = qemu_save_device_state(f); 2885 if (ret < 0 || qemu_fclose(f) < 0) { 2886 error_setg(errp, QERR_IO_ERROR); 2887 } else { 2888 /* libxl calls the QMP command "stop" before calling 2889 * "xen-save-devices-state" and in case of migration failure, libxl 2890 * would call "cont". 2891 * So call bdrv_inactivate_all (release locks) here to let the other 2892 * side of the migration take control of the images. 2893 */ 2894 if (live && !saved_vm_running) { 2895 ret = bdrv_inactivate_all(); 2896 if (ret) { 2897 error_setg(errp, "%s: bdrv_inactivate_all() failed (%d)", 2898 __func__, ret); 2899 } 2900 } 2901 } 2902 2903 the_end: 2904 if (saved_vm_running) { 2905 vm_start(); 2906 } 2907 } 2908 2909 void qmp_xen_load_devices_state(const char *filename, Error **errp) 2910 { 2911 QEMUFile *f; 2912 QIOChannelFile *ioc; 2913 int ret; 2914 2915 /* Guest must be paused before loading the device state; the RAM state 2916 * will already have been loaded by xc 2917 */ 2918 if (runstate_is_running()) { 2919 error_setg(errp, "Cannot update device state while vm is running"); 2920 return; 2921 } 2922 vm_stop(RUN_STATE_RESTORE_VM); 2923 2924 ioc = qio_channel_file_new_path(filename, O_RDONLY | O_BINARY, 0, errp); 2925 if (!ioc) { 2926 return; 2927 } 2928 qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-load-state"); 2929 f = qemu_fopen_channel_input(QIO_CHANNEL(ioc)); 2930 object_unref(OBJECT(ioc)); 2931 2932 ret = qemu_loadvm_state(f); 2933 qemu_fclose(f); 2934 if (ret < 0) { 2935 error_setg(errp, QERR_IO_ERROR); 2936 } 2937 migration_incoming_state_destroy(); 2938 } 2939 2940 int load_snapshot(const char *name, Error **errp) 2941 { 2942 BlockDriverState *bs, *bs_vm_state; 2943 QEMUSnapshotInfo sn; 2944 QEMUFile *f; 2945 int ret; 2946 AioContext *aio_context; 2947 MigrationIncomingState *mis = migration_incoming_get_current(); 2948 2949 if (!bdrv_all_can_snapshot(&bs)) { 2950 error_setg(errp, 2951 "Device '%s' is writable but does not support snapshots", 2952 bdrv_get_device_or_node_name(bs)); 2953 return -ENOTSUP; 2954 } 2955 ret = bdrv_all_find_snapshot(name, &bs); 2956 if (ret < 0) { 2957 error_setg(errp, 2958 "Device '%s' does not have the requested snapshot '%s'", 2959 bdrv_get_device_or_node_name(bs), name); 2960 return ret; 2961 } 2962 2963 bs_vm_state = bdrv_all_find_vmstate_bs(); 2964 if (!bs_vm_state) { 2965 error_setg(errp, "No block device supports snapshots"); 2966 return -ENOTSUP; 2967 } 2968 aio_context = bdrv_get_aio_context(bs_vm_state); 2969 2970 /* Don't even try to load empty VM states */ 2971 aio_context_acquire(aio_context); 2972 ret = bdrv_snapshot_find(bs_vm_state, &sn, name); 2973 aio_context_release(aio_context); 2974 if (ret < 0) { 2975 return ret; 2976 } else if (sn.vm_state_size == 0) { 2977 error_setg(errp, "This is a disk-only snapshot. Revert to it " 2978 " offline using qemu-img"); 2979 return -EINVAL; 2980 } 2981 2982 /* 2983 * Flush the record/replay queue. Now the VM state is going 2984 * to change. Therefore we don't need to preserve its consistency 2985 */ 2986 replay_flush_events(); 2987 2988 /* Flush all IO requests so they don't interfere with the new state. */ 2989 bdrv_drain_all_begin(); 2990 2991 ret = bdrv_all_goto_snapshot(name, &bs, errp); 2992 if (ret < 0) { 2993 error_prepend(errp, "Could not load snapshot '%s' on '%s': ", 2994 name, bdrv_get_device_or_node_name(bs)); 2995 goto err_drain; 2996 } 2997 2998 /* restore the VM state */ 2999 f = qemu_fopen_bdrv(bs_vm_state, 0); 3000 if (!f) { 3001 error_setg(errp, "Could not open VM state file"); 3002 ret = -EINVAL; 3003 goto err_drain; 3004 } 3005 3006 qemu_system_reset(SHUTDOWN_CAUSE_NONE); 3007 mis->from_src_file = f; 3008 3009 aio_context_acquire(aio_context); 3010 ret = qemu_loadvm_state(f); 3011 migration_incoming_state_destroy(); 3012 aio_context_release(aio_context); 3013 3014 bdrv_drain_all_end(); 3015 3016 if (ret < 0) { 3017 error_setg(errp, "Error %d while loading VM state", ret); 3018 return ret; 3019 } 3020 3021 return 0; 3022 3023 err_drain: 3024 bdrv_drain_all_end(); 3025 return ret; 3026 } 3027 3028 void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev) 3029 { 3030 qemu_ram_set_idstr(mr->ram_block, 3031 memory_region_name(mr), dev); 3032 qemu_ram_set_migratable(mr->ram_block); 3033 } 3034 3035 void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev) 3036 { 3037 qemu_ram_unset_idstr(mr->ram_block); 3038 qemu_ram_unset_migratable(mr->ram_block); 3039 } 3040 3041 void vmstate_register_ram_global(MemoryRegion *mr) 3042 { 3043 vmstate_register_ram(mr, NULL); 3044 } 3045 3046 bool vmstate_check_only_migratable(const VMStateDescription *vmsd) 3047 { 3048 /* check needed if --only-migratable is specified */ 3049 if (!only_migratable) { 3050 return true; 3051 } 3052 3053 return !(vmsd && vmsd->unmigratable); 3054 } 3055