1 /* 2 * Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates 3 * based on the vhost-user-test.c that is: 4 * Copyright (c) 2014 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 13 #include "chardev/char.h" 14 #include "crypto/tlscredspsk.h" 15 #include "libqtest.h" 16 #include "migration/bootfile.h" 17 #include "migration/framework.h" 18 #include "migration/migration-qmp.h" 19 #include "migration/migration-util.h" 20 #include "ppc-util.h" 21 #include "qapi/error.h" 22 #include "qobject/qjson.h" 23 #include "qobject/qlist.h" 24 #include "qemu/module.h" 25 #include "qemu/option.h" 26 #include "qemu/range.h" 27 #include "qemu/sockets.h" 28 29 30 #define QEMU_VM_FILE_MAGIC 0x5145564d 31 #define QEMU_ENV_SRC "QTEST_QEMU_BINARY_SRC" 32 #define QEMU_ENV_DST "QTEST_QEMU_BINARY_DST" 33 #define MULTIFD_TEST_CHANNELS 4 34 35 unsigned start_address; 36 unsigned end_address; 37 static QTestMigrationState src_state; 38 static QTestMigrationState dst_state; 39 static char *tmpfs; 40 41 /* 42 * An initial 3 MB offset is used as that corresponds 43 * to ~1 sec of data transfer with our bandwidth setting. 44 */ 45 #define MAGIC_OFFSET_BASE (3 * 1024 * 1024) 46 /* 47 * A further 1k is added to ensure we're not a multiple 48 * of TEST_MEM_PAGE_SIZE, thus avoid clash with writes 49 * from the migration guest workload. 50 */ 51 #define MAGIC_OFFSET_SHUFFLE 1024 52 #define MAGIC_OFFSET (MAGIC_OFFSET_BASE + MAGIC_OFFSET_SHUFFLE) 53 #define MAGIC_MARKER 0xFEED12345678CAFEULL 54 55 56 /* 57 * Wait for some output in the serial output file, 58 * we get an 'A' followed by an endless string of 'B's 59 * but on the destination we won't have the A (unless we enabled suspend/resume) 60 */ 61 void wait_for_serial(const char *side) 62 { 63 g_autofree char *serialpath = g_strdup_printf("%s/%s", tmpfs, side); 64 FILE *serialfile = fopen(serialpath, "r"); 65 66 do { 67 int readvalue = fgetc(serialfile); 68 69 switch (readvalue) { 70 case 'A': 71 /* Fine */ 72 break; 73 74 case 'B': 75 /* It's alive! */ 76 fclose(serialfile); 77 return; 78 79 case EOF: 80 fseek(serialfile, 0, SEEK_SET); 81 usleep(1000); 82 break; 83 84 default: 85 fprintf(stderr, "Unexpected %d on %s serial\n", readvalue, side); 86 g_assert_not_reached(); 87 } 88 } while (true); 89 } 90 91 void migrate_prepare_for_dirty_mem(QTestState *from) 92 { 93 /* 94 * The guest workflow iterates from start_address to 95 * end_address, writing 1 byte every TEST_MEM_PAGE_SIZE 96 * bytes. 97 * 98 * IOW, if we write to mem at a point which is NOT 99 * a multiple of TEST_MEM_PAGE_SIZE, our write won't 100 * conflict with the migration workflow. 101 * 102 * We put in a marker here, that we'll use to determine 103 * when the data has been transferred to the dst. 104 */ 105 qtest_writeq(from, start_address + MAGIC_OFFSET, MAGIC_MARKER); 106 } 107 108 void migrate_wait_for_dirty_mem(QTestState *from, QTestState *to) 109 { 110 uint64_t watch_address = start_address + MAGIC_OFFSET_BASE; 111 uint64_t marker_address = start_address + MAGIC_OFFSET; 112 uint8_t watch_byte; 113 114 /* 115 * Wait for the MAGIC_MARKER to get transferred, as an 116 * indicator that a migration pass has made some known 117 * amount of progress. 118 */ 119 do { 120 usleep(1000 * 10); 121 } while (qtest_readq(to, marker_address) != MAGIC_MARKER); 122 123 124 /* If suspended, src only iterates once, and watch_byte may never change */ 125 if (src_state.suspend_me) { 126 return; 127 } 128 129 /* 130 * Now ensure that already transferred bytes are 131 * dirty again from the guest workload. Note the 132 * guest byte value will wrap around and by chance 133 * match the original watch_byte. This is harmless 134 * as we'll eventually see a different value if we 135 * keep watching 136 */ 137 watch_byte = qtest_readb(from, watch_address); 138 do { 139 usleep(1000 * 10); 140 } while (qtest_readb(from, watch_address) == watch_byte); 141 } 142 143 static void check_guests_ram(QTestState *who) 144 { 145 /* 146 * Our ASM test will have been incrementing one byte from each page from 147 * start_address to < end_address in order. This gives us a constraint 148 * that any page's byte should be equal or less than the previous pages 149 * byte (mod 256); and they should all be equal except for one transition 150 * at the point where we meet the incrementer. (We're running this with 151 * the guest stopped). 152 */ 153 unsigned address; 154 uint8_t first_byte; 155 uint8_t last_byte; 156 bool hit_edge = false; 157 int bad = 0; 158 159 qtest_memread(who, start_address, &first_byte, 1); 160 last_byte = first_byte; 161 162 for (address = start_address + TEST_MEM_PAGE_SIZE; address < end_address; 163 address += TEST_MEM_PAGE_SIZE) 164 { 165 uint8_t b; 166 qtest_memread(who, address, &b, 1); 167 if (b != last_byte) { 168 if (((b + 1) % 256) == last_byte && !hit_edge) { 169 /* 170 * This is OK, the guest stopped at the point of 171 * incrementing the previous page but didn't get 172 * to us yet. 173 */ 174 hit_edge = true; 175 last_byte = b; 176 } else { 177 bad++; 178 if (bad <= 10) { 179 fprintf(stderr, "Memory content inconsistency at %x" 180 " first_byte = %x last_byte = %x current = %x" 181 " hit_edge = %x\n", 182 address, first_byte, last_byte, b, hit_edge); 183 } 184 } 185 } 186 } 187 if (bad >= 10) { 188 fprintf(stderr, "and in another %d pages", bad - 10); 189 } 190 g_assert(bad == 0); 191 } 192 193 static void cleanup(const char *filename) 194 { 195 g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, filename); 196 197 unlink(path); 198 } 199 200 static QList *migrate_start_get_qmp_capabilities(const MigrateStart *args) 201 { 202 QList *capabilities = NULL; 203 204 if (args->oob) { 205 capabilities = qlist_new(); 206 qlist_append_str(capabilities, "oob"); 207 } 208 return capabilities; 209 } 210 211 static void migrate_start_set_capabilities(QTestState *from, QTestState *to, 212 MigrateStart *args) 213 { 214 /* 215 * MigrationCapability_lookup and MIGRATION_CAPABILITY_ constants 216 * are from qapi-types-migration.h. 217 */ 218 for (uint8_t i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 219 if (!args->caps[i]) { 220 continue; 221 } 222 if (from) { 223 migrate_set_capability(from, 224 MigrationCapability_lookup.array[i], true); 225 } 226 if (to) { 227 migrate_set_capability(to, 228 MigrationCapability_lookup.array[i], true); 229 } 230 } 231 232 /* 233 * Always enable migration events. Libvirt always uses it, let's try 234 * to mimic as closer as that. 235 */ 236 migrate_set_capability(from, "events", true); 237 if (!args->defer_target_connect && to) { 238 migrate_set_capability(to, "events", true); 239 } 240 241 /* 242 * Default number of channels should be fine for most 243 * tests. Individual tests can override by calling 244 * migrate_set_parameter() directly. 245 */ 246 if (args->caps[MIGRATION_CAPABILITY_MULTIFD]) { 247 migrate_set_parameter_int(from, "multifd-channels", 248 MULTIFD_TEST_CHANNELS); 249 if (to) { 250 migrate_set_parameter_int(to, "multifd-channels", 251 MULTIFD_TEST_CHANNELS); 252 } 253 } 254 255 return; 256 } 257 258 static char *test_shmem_path(void) 259 { 260 return g_strdup_printf("/dev/shm/qemu-%d", getpid()); 261 } 262 263 #define MIG_MEM_ID "mig.mem" 264 265 /* NOTE: caller is responsbile to free the string if returned */ 266 static char *migrate_mem_type_get_opts(MemType type, const char *memory_size) 267 { 268 g_autofree char *shmem_path = NULL; 269 g_autofree char *backend = NULL; 270 bool share = true; 271 char *opts; 272 273 switch (type) { 274 case MEM_TYPE_ANON: 275 backend = g_strdup("-object memory-backend-ram"); 276 share = false; 277 break; 278 case MEM_TYPE_SHMEM: 279 shmem_path = test_shmem_path(); 280 backend = g_strdup_printf("-object memory-backend-file,mem-path=%s", 281 shmem_path); 282 break; 283 case MEM_TYPE_MEMFD: 284 backend = g_strdup("-object memory-backend-memfd"); 285 break; 286 default: 287 g_assert_not_reached(); 288 break; 289 } 290 291 opts = g_strdup_printf("%s,id=%s,size=%s,share=%s", 292 backend, MIG_MEM_ID, memory_size, 293 share ? "on" : "off"); 294 295 return opts; 296 } 297 298 int migrate_args(char **from, char **to, const char *uri, MigrateStart *args) 299 { 300 /* options for source and target */ 301 g_autofree gchar *arch_opts = NULL; 302 gchar *cmd_source = NULL; 303 gchar *cmd_target = NULL; 304 const gchar *ignore_stderr; 305 g_autofree char *mem_object = NULL; 306 const char *kvm_opts = NULL; 307 const char *arch = qtest_get_arch(); 308 const char *memory_size; 309 const char *machine_alias, *machine_opts = ""; 310 g_autofree char *machine = NULL; 311 const char *bootpath = bootfile_get(); 312 g_autofree char *memory_backend = NULL; 313 const char *events; 314 315 if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { 316 memory_size = "150M"; 317 318 if (g_str_equal(arch, "i386")) { 319 machine_alias = "pc"; 320 } else { 321 machine_alias = "q35"; 322 } 323 arch_opts = g_strdup_printf( 324 "-drive if=none,id=d0,file=%s,format=raw " 325 "-device ide-hd,drive=d0,secs=1,cyls=1,heads=1", bootpath); 326 start_address = X86_TEST_MEM_START; 327 end_address = X86_TEST_MEM_END; 328 } else if (g_str_equal(arch, "s390x")) { 329 memory_size = "128M"; 330 machine_alias = "s390-ccw-virtio"; 331 arch_opts = g_strdup_printf("-bios %s", bootpath); 332 start_address = S390_TEST_MEM_START; 333 end_address = S390_TEST_MEM_END; 334 } else if (strcmp(arch, "ppc64") == 0) { 335 memory_size = "256M"; 336 start_address = PPC_TEST_MEM_START; 337 end_address = PPC_TEST_MEM_END; 338 machine_alias = "pseries"; 339 machine_opts = "vsmt=8"; 340 arch_opts = g_strdup_printf( 341 "-nodefaults -machine " PSERIES_DEFAULT_CAPABILITIES " " 342 "-bios %s", bootpath); 343 } else if (strcmp(arch, "aarch64") == 0) { 344 memory_size = "150M"; 345 machine_alias = "virt"; 346 machine_opts = "gic-version=3"; 347 arch_opts = g_strdup_printf("-cpu max -kernel %s", bootpath); 348 start_address = ARM_TEST_MEM_START; 349 end_address = ARM_TEST_MEM_END; 350 } else { 351 g_assert_not_reached(); 352 } 353 354 if (!getenv("QTEST_LOG") && args->hide_stderr) { 355 #ifndef _WIN32 356 ignore_stderr = "2>/dev/null"; 357 #else 358 /* 359 * On Windows the QEMU executable is created via CreateProcess() and 360 * IO redirection does not work, so don't bother adding IO redirection 361 * to the command line. 362 */ 363 ignore_stderr = ""; 364 #endif 365 } else { 366 ignore_stderr = ""; 367 } 368 369 mem_object = migrate_mem_type_get_opts(args->mem_type, memory_size); 370 memory_backend = g_strdup_printf("-machine memory-backend=%s %s", 371 MIG_MEM_ID, mem_object); 372 373 if (args->use_dirty_ring) { 374 kvm_opts = ",dirty-ring-size=4096"; 375 } 376 377 if (!qtest_has_machine(machine_alias)) { 378 g_autofree char *msg = g_strdup_printf("machine %s not supported", machine_alias); 379 g_test_skip(msg); 380 return -1; 381 } 382 383 machine = resolve_machine_version(machine_alias, QEMU_ENV_SRC, 384 QEMU_ENV_DST); 385 386 g_test_message("Using machine type: %s", machine); 387 388 cmd_source = g_strdup_printf("-accel kvm%s -accel tcg " 389 "-machine %s,%s " 390 "-name source,debug-threads=on " 391 "%s " 392 "-serial file:%s/src_serial " 393 "%s %s %s", 394 kvm_opts ? kvm_opts : "", 395 machine, machine_opts, 396 memory_backend, tmpfs, 397 arch_opts ? arch_opts : "", 398 args->opts_source ? args->opts_source : "", 399 ignore_stderr); 400 401 /* 402 * If the monitor connection is deferred, enable events on the command line 403 * so none are missed. This is for testing only, do not set migration 404 * options like this in general. 405 */ 406 events = args->defer_target_connect ? "-global migration.x-events=on" : ""; 407 408 cmd_target = g_strdup_printf("-accel kvm%s -accel tcg " 409 "-machine %s,%s " 410 "-name target,debug-threads=on " 411 "%s " 412 "-serial file:%s/dest_serial " 413 "-incoming %s " 414 "%s %s %s %s", 415 kvm_opts ? kvm_opts : "", 416 machine, machine_opts, 417 memory_backend, tmpfs, uri, 418 events, 419 arch_opts ? arch_opts : "", 420 args->opts_target ? args->opts_target : "", 421 ignore_stderr); 422 423 *from = cmd_source; 424 *to = cmd_target; 425 return 0; 426 } 427 428 static bool migrate_mem_type_prepare(MemType type) 429 { 430 switch (type) { 431 case MEM_TYPE_SHMEM: 432 if (!g_file_test("/dev/shm", G_FILE_TEST_IS_DIR)) { 433 g_test_skip("/dev/shm is not supported"); 434 return false; 435 } 436 break; 437 default: 438 break; 439 } 440 441 return true; 442 } 443 444 static void migrate_mem_type_cleanup(MemType type) 445 { 446 g_autofree char *shmem_path = NULL; 447 448 switch (type) { 449 case MEM_TYPE_SHMEM: 450 451 /* 452 * Remove shmem file immediately to avoid memory leak in test 453 * failed case. It's valid because QEMU has already opened this 454 * file 455 */ 456 shmem_path = test_shmem_path(); 457 unlink(shmem_path); 458 break; 459 default: 460 break; 461 } 462 } 463 464 int migrate_start(QTestState **from, QTestState **to, const char *uri, 465 MigrateStart *args) 466 { 467 g_autofree gchar *cmd_source = NULL; 468 g_autofree gchar *cmd_target = NULL; 469 g_autoptr(QList) capabilities = migrate_start_get_qmp_capabilities(args); 470 471 if (!migrate_mem_type_prepare(args->mem_type)) { 472 return -1; 473 } 474 475 dst_state = (QTestMigrationState) { }; 476 src_state = (QTestMigrationState) { }; 477 bootfile_create(qtest_get_arch(), tmpfs, args->suspend_me); 478 src_state.suspend_me = args->suspend_me; 479 480 if (migrate_args(&cmd_source, &cmd_target, uri, args)) { 481 return -1; 482 } 483 484 if (!args->only_target) { 485 *from = qtest_init_ext(QEMU_ENV_SRC, cmd_source, capabilities, true); 486 qtest_qmp_set_event_callback(*from, 487 migrate_watch_for_events, 488 &src_state); 489 } 490 491 if (!args->only_source) { 492 *to = qtest_init_ext(QEMU_ENV_DST, cmd_target, capabilities, 493 !args->defer_target_connect); 494 qtest_qmp_set_event_callback(*to, 495 migrate_watch_for_events, 496 &dst_state); 497 } 498 499 migrate_mem_type_cleanup(args->mem_type); 500 migrate_start_set_capabilities(*from, 501 args->only_source ? NULL : *to, 502 args); 503 504 return 0; 505 } 506 507 void migrate_end(QTestState *from, QTestState *to, bool test_dest) 508 { 509 unsigned char dest_byte_a, dest_byte_b, dest_byte_c, dest_byte_d; 510 511 qtest_quit(from); 512 513 if (test_dest) { 514 qtest_memread(to, start_address, &dest_byte_a, 1); 515 516 /* Destination still running, wait for a byte to change */ 517 do { 518 qtest_memread(to, start_address, &dest_byte_b, 1); 519 usleep(1000 * 10); 520 } while (dest_byte_a == dest_byte_b); 521 522 qtest_qmp_assert_success(to, "{ 'execute' : 'stop'}"); 523 524 /* With it stopped, check nothing changes */ 525 qtest_memread(to, start_address, &dest_byte_c, 1); 526 usleep(1000 * 200); 527 qtest_memread(to, start_address, &dest_byte_d, 1); 528 g_assert_cmpint(dest_byte_c, ==, dest_byte_d); 529 530 check_guests_ram(to); 531 } 532 533 qtest_quit(to); 534 535 cleanup("migsocket"); 536 cleanup("cpr.sock"); 537 cleanup("src_serial"); 538 cleanup("dest_serial"); 539 cleanup(FILE_TEST_FILENAME); 540 } 541 542 static int migrate_postcopy_prepare(QTestState **from_ptr, 543 QTestState **to_ptr, 544 MigrateCommon *args) 545 { 546 QTestState *from, *to; 547 548 /* set postcopy capabilities */ 549 args->start.caps[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME] = true; 550 args->start.caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] = true; 551 552 if (migrate_start(&from, &to, "defer", &args->start)) { 553 return -1; 554 } 555 556 if (args->start_hook) { 557 args->postcopy_data = args->start_hook(from, to); 558 } 559 560 migrate_ensure_non_converge(from); 561 migrate_prepare_for_dirty_mem(from); 562 qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming'," 563 " 'arguments': { " 564 " 'channels': [ { 'channel-type': 'main'," 565 " 'addr': { 'transport': 'socket'," 566 " 'type': 'inet'," 567 " 'host': '127.0.0.1'," 568 " 'port': '0' } } ] } }"); 569 570 /* Wait for the first serial output from the source */ 571 wait_for_serial("src_serial"); 572 wait_for_suspend(from, &src_state); 573 574 migrate_qmp(from, to, NULL, NULL, "{}"); 575 576 migrate_wait_for_dirty_mem(from, to); 577 578 *from_ptr = from; 579 *to_ptr = to; 580 581 return 0; 582 } 583 584 static void migrate_postcopy_complete(QTestState *from, QTestState *to, 585 MigrateCommon *args) 586 { 587 MigrationTestEnv *env = migration_get_env(); 588 589 wait_for_migration_complete(from); 590 591 if (args->start.suspend_me) { 592 /* wakeup succeeds only if guest is suspended */ 593 qtest_qmp_assert_success(to, "{'execute': 'system_wakeup'}"); 594 } 595 596 /* Make sure we get at least one "B" on destination */ 597 wait_for_serial("dest_serial"); 598 599 if (env->uffd_feature_thread_id) { 600 read_blocktime(to); 601 } 602 603 if (args->end_hook) { 604 args->end_hook(from, to, args->postcopy_data); 605 args->postcopy_data = NULL; 606 } 607 608 migrate_end(from, to, true); 609 } 610 611 void test_postcopy_common(MigrateCommon *args) 612 { 613 QTestState *from, *to; 614 615 if (migrate_postcopy_prepare(&from, &to, args)) { 616 return; 617 } 618 migrate_postcopy_start(from, to, &src_state); 619 migrate_postcopy_complete(from, to, args); 620 } 621 622 static void wait_for_postcopy_status(QTestState *one, const char *status) 623 { 624 wait_for_migration_status(one, status, 625 (const char * []) { 626 "failed", "active", 627 "completed", NULL 628 }); 629 } 630 631 static void postcopy_recover_fail(QTestState *from, QTestState *to, 632 PostcopyRecoveryFailStage stage) 633 { 634 #ifndef _WIN32 635 bool fail_early = (stage == POSTCOPY_FAIL_CHANNEL_ESTABLISH); 636 int ret, pair1[2], pair2[2]; 637 char c; 638 639 g_assert(stage > POSTCOPY_FAIL_NONE && stage < POSTCOPY_FAIL_MAX); 640 641 /* Create two unrelated socketpairs */ 642 ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair1); 643 g_assert_cmpint(ret, ==, 0); 644 645 ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair2); 646 g_assert_cmpint(ret, ==, 0); 647 648 /* 649 * Give the guests unpaired ends of the sockets, so they'll all blocked 650 * at reading. This mimics a wrong channel established. 651 */ 652 qtest_qmp_fds_assert_success(from, &pair1[0], 1, 653 "{ 'execute': 'getfd'," 654 " 'arguments': { 'fdname': 'fd-mig' }}"); 655 qtest_qmp_fds_assert_success(to, &pair2[0], 1, 656 "{ 'execute': 'getfd'," 657 " 'arguments': { 'fdname': 'fd-mig' }}"); 658 659 /* 660 * Write the 1st byte as QEMU_VM_COMMAND (0x8) for the dest socket, to 661 * emulate the 1st byte of a real recovery, but stops from there to 662 * keep dest QEMU in RECOVER. This is needed so that we can kick off 663 * the recover process on dest QEMU (by triggering the G_IO_IN event). 664 * 665 * NOTE: this trick is not needed on src QEMUs, because src doesn't 666 * rely on an pre-existing G_IO_IN event, so it will always trigger the 667 * upcoming recovery anyway even if it can read nothing. 668 */ 669 #define QEMU_VM_COMMAND 0x08 670 c = QEMU_VM_COMMAND; 671 ret = send(pair2[1], &c, 1, 0); 672 g_assert_cmpint(ret, ==, 1); 673 674 if (stage == POSTCOPY_FAIL_CHANNEL_ESTABLISH) { 675 /* 676 * This will make src QEMU to fail at an early stage when trying to 677 * resume later, where it shouldn't reach RECOVER stage at all. 678 */ 679 close(pair1[1]); 680 } 681 682 migrate_recover(to, "fd:fd-mig"); 683 migrate_qmp(from, to, "fd:fd-mig", NULL, "{'resume': true}"); 684 685 /* 686 * Source QEMU has an extra RECOVER_SETUP phase, dest doesn't have it. 687 * Make sure it appears along the way. 688 */ 689 migration_event_wait(from, "postcopy-recover-setup"); 690 691 if (fail_early) { 692 /* 693 * When fails at reconnection, src QEMU will automatically goes 694 * back to PAUSED state. Making sure there is an event in this 695 * case: Libvirt relies on this to detect early reconnection 696 * errors. 697 */ 698 migration_event_wait(from, "postcopy-paused"); 699 } else { 700 /* 701 * We want to test "fail later" at RECOVER stage here. Make sure 702 * both QEMU instances will go into RECOVER stage first, then test 703 * kicking them out using migrate-pause. 704 * 705 * Explicitly check the RECOVER event on src, that's what Libvirt 706 * relies on, rather than polling. 707 */ 708 migration_event_wait(from, "postcopy-recover"); 709 wait_for_postcopy_status(from, "postcopy-recover"); 710 711 /* Need an explicit kick on src QEMU in this case */ 712 migrate_pause(from); 713 } 714 715 /* 716 * For all failure cases, we'll reach such states on both sides now. 717 * Check them. 718 */ 719 wait_for_postcopy_status(from, "postcopy-paused"); 720 wait_for_postcopy_status(to, "postcopy-recover"); 721 722 /* 723 * Kick dest QEMU out too. This is normally not needed in reality 724 * because when the channel is shutdown it should also happen on src. 725 * However here we used separate socket pairs so we need to do that 726 * explicitly. 727 */ 728 migrate_pause(to); 729 wait_for_postcopy_status(to, "postcopy-paused"); 730 731 close(pair1[0]); 732 close(pair2[0]); 733 close(pair2[1]); 734 735 if (stage != POSTCOPY_FAIL_CHANNEL_ESTABLISH) { 736 close(pair1[1]); 737 } 738 #endif 739 } 740 741 void test_postcopy_recovery_common(MigrateCommon *args) 742 { 743 QTestState *from, *to; 744 g_autofree char *uri = NULL; 745 746 /* 747 * Always enable OOB QMP capability for recovery tests, migrate-recover is 748 * executed out-of-band 749 */ 750 args->start.oob = true; 751 752 /* Always hide errors for postcopy recover tests since they're expected */ 753 args->start.hide_stderr = true; 754 755 if (migrate_postcopy_prepare(&from, &to, args)) { 756 return; 757 } 758 759 /* Turn postcopy speed down, 4K/s is slow enough on any machines */ 760 migrate_set_parameter_int(from, "max-postcopy-bandwidth", 4096); 761 762 /* Now we start the postcopy */ 763 migrate_postcopy_start(from, to, &src_state); 764 765 /* 766 * Wait until postcopy is really started; we can only run the 767 * migrate-pause command during a postcopy 768 */ 769 wait_for_migration_status(from, "postcopy-active", NULL); 770 771 /* 772 * Manually stop the postcopy migration. This emulates a network 773 * failure with the migration socket 774 */ 775 migrate_pause(from); 776 777 /* 778 * Wait for destination side to reach postcopy-paused state. The 779 * migrate-recover command can only succeed if destination machine 780 * is in the paused state 781 */ 782 wait_for_postcopy_status(to, "postcopy-paused"); 783 wait_for_postcopy_status(from, "postcopy-paused"); 784 785 if (args->postcopy_recovery_fail_stage) { 786 /* 787 * Test when a wrong socket specified for recover, and then the 788 * ability to kick it out, and continue with a correct socket. 789 */ 790 postcopy_recover_fail(from, to, args->postcopy_recovery_fail_stage); 791 /* continue with a good recovery */ 792 } 793 794 /* 795 * Create a new socket to emulate a new channel that is different 796 * from the broken migration channel; tell the destination to 797 * listen to the new port 798 */ 799 uri = g_strdup_printf("unix:%s/migsocket-recover", tmpfs); 800 migrate_recover(to, uri); 801 802 /* 803 * Try to rebuild the migration channel using the resume flag and 804 * the newly created channel 805 */ 806 migrate_qmp(from, to, uri, NULL, "{'resume': true}"); 807 808 /* Restore the postcopy bandwidth to unlimited */ 809 migrate_set_parameter_int(from, "max-postcopy-bandwidth", 0); 810 811 migrate_postcopy_complete(from, to, args); 812 } 813 814 int test_precopy_common(MigrateCommon *args) 815 { 816 QTestState *from, *to; 817 void *data_hook = NULL; 818 QObject *in_channels = NULL; 819 QObject *out_channels = NULL; 820 821 g_assert(!args->cpr_channel || args->connect_channels); 822 823 if (migrate_start(&from, &to, args->listen_uri, &args->start)) { 824 return -1; 825 } 826 827 if (args->start_hook) { 828 data_hook = args->start_hook(from, to); 829 } 830 831 /* Wait for the first serial output from the source */ 832 if (args->result == MIG_TEST_SUCCEED) { 833 wait_for_serial("src_serial"); 834 wait_for_suspend(from, &src_state); 835 } 836 837 if (args->live) { 838 migrate_ensure_non_converge(from); 839 migrate_prepare_for_dirty_mem(from); 840 } else { 841 /* 842 * Testing non-live migration, we allow it to run at 843 * full speed to ensure short test case duration. 844 * For tests expected to fail, we don't need to 845 * change anything. 846 */ 847 if (args->result == MIG_TEST_SUCCEED) { 848 qtest_qmp_assert_success(from, "{ 'execute' : 'stop'}"); 849 wait_for_stop(from, &src_state); 850 migrate_ensure_converge(from); 851 } 852 } 853 854 /* 855 * The cpr channel must be included in outgoing channels, but not in 856 * migrate-incoming channels. 857 */ 858 if (args->connect_channels) { 859 if (args->start.defer_target_connect && 860 !strcmp(args->listen_uri, "defer")) { 861 in_channels = qobject_from_json(args->connect_channels, 862 &error_abort); 863 } 864 out_channels = qobject_from_json(args->connect_channels, &error_abort); 865 866 if (args->cpr_channel) { 867 QList *channels_list = qobject_to(QList, out_channels); 868 QObject *obj = migrate_str_to_channel(args->cpr_channel); 869 870 qlist_append(channels_list, obj); 871 } 872 } 873 874 if (args->result == MIG_TEST_QMP_ERROR) { 875 migrate_qmp_fail(from, args->connect_uri, out_channels, "{}"); 876 goto finish; 877 } 878 879 migrate_qmp(from, to, args->connect_uri, out_channels, "{}"); 880 881 if (args->start.defer_target_connect) { 882 qtest_connect(to); 883 qtest_qmp_handshake(to, NULL); 884 if (!strcmp(args->listen_uri, "defer")) { 885 migrate_incoming_qmp(to, args->connect_uri, in_channels, "{}"); 886 } 887 } 888 889 if (args->result != MIG_TEST_SUCCEED) { 890 bool allow_active = args->result == MIG_TEST_FAIL; 891 wait_for_migration_fail(from, allow_active); 892 893 if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) { 894 qtest_set_expected_status(to, EXIT_FAILURE); 895 } 896 } else { 897 if (args->live) { 898 /* 899 * For initial iteration(s) we must do a full pass, 900 * but for the final iteration, we need only wait 901 * for some dirty mem before switching to converge 902 */ 903 while (args->iterations > 1) { 904 wait_for_migration_pass(from, &src_state); 905 args->iterations--; 906 } 907 migrate_wait_for_dirty_mem(from, to); 908 909 migrate_ensure_converge(from); 910 911 /* 912 * We do this first, as it has a timeout to stop us 913 * hanging forever if migration didn't converge 914 */ 915 wait_for_migration_complete(from); 916 917 wait_for_stop(from, &src_state); 918 919 } else { 920 wait_for_migration_complete(from); 921 /* 922 * Must wait for dst to finish reading all incoming 923 * data on the socket before issuing 'cont' otherwise 924 * it'll be ignored 925 */ 926 wait_for_migration_complete(to); 927 928 qtest_qmp_assert_success(to, "{ 'execute' : 'cont'}"); 929 } 930 931 wait_for_resume(to, &dst_state); 932 933 if (args->start.suspend_me) { 934 /* wakeup succeeds only if guest is suspended */ 935 qtest_qmp_assert_success(to, "{'execute': 'system_wakeup'}"); 936 } 937 938 wait_for_serial("dest_serial"); 939 } 940 941 finish: 942 if (args->end_hook) { 943 args->end_hook(from, to, data_hook); 944 } 945 946 migrate_end(from, to, args->result == MIG_TEST_SUCCEED); 947 948 return 0; 949 } 950 951 static void file_dirty_offset_region(void) 952 { 953 g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME); 954 size_t size = FILE_TEST_OFFSET; 955 g_autofree char *data = g_new0(char, size); 956 957 memset(data, FILE_TEST_MARKER, size); 958 g_assert(g_file_set_contents(path, data, size, NULL)); 959 } 960 961 static void file_check_offset_region(void) 962 { 963 g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME); 964 size_t size = FILE_TEST_OFFSET; 965 g_autofree char *expected = g_new0(char, size); 966 g_autofree char *actual = NULL; 967 uint64_t *stream_start; 968 969 /* 970 * Ensure the skipped offset region's data has not been touched 971 * and the migration stream starts at the right place. 972 */ 973 974 memset(expected, FILE_TEST_MARKER, size); 975 976 g_assert(g_file_get_contents(path, &actual, NULL, NULL)); 977 g_assert(!memcmp(actual, expected, size)); 978 979 stream_start = (uint64_t *)(actual + size); 980 g_assert_cmpint(cpu_to_be64(*stream_start) >> 32, ==, QEMU_VM_FILE_MAGIC); 981 } 982 983 void test_file_common(MigrateCommon *args, bool stop_src) 984 { 985 QTestState *from, *to; 986 void *data_hook = NULL; 987 bool check_offset = false; 988 989 if (migrate_start(&from, &to, args->listen_uri, &args->start)) { 990 return; 991 } 992 993 /* 994 * File migration is never live. We can keep the source VM running 995 * during migration, but the destination will not be running 996 * concurrently. 997 */ 998 g_assert_false(args->live); 999 1000 if (g_strrstr(args->connect_uri, "offset=")) { 1001 check_offset = true; 1002 /* 1003 * This comes before the start_hook because it's equivalent to 1004 * a management application creating the file and writing to 1005 * it so hooks should expect the file to be already present. 1006 */ 1007 file_dirty_offset_region(); 1008 } 1009 1010 if (args->start_hook) { 1011 data_hook = args->start_hook(from, to); 1012 } 1013 1014 migrate_ensure_converge(from); 1015 wait_for_serial("src_serial"); 1016 1017 if (stop_src) { 1018 qtest_qmp_assert_success(from, "{ 'execute' : 'stop'}"); 1019 wait_for_stop(from, &src_state); 1020 } 1021 1022 if (args->result == MIG_TEST_QMP_ERROR) { 1023 migrate_qmp_fail(from, args->connect_uri, NULL, "{}"); 1024 goto finish; 1025 } 1026 1027 migrate_qmp(from, to, args->connect_uri, NULL, "{}"); 1028 wait_for_migration_complete(from); 1029 1030 /* 1031 * We need to wait for the source to finish before starting the 1032 * destination. 1033 */ 1034 migrate_incoming_qmp(to, args->connect_uri, NULL, "{}"); 1035 wait_for_migration_complete(to); 1036 1037 if (stop_src) { 1038 qtest_qmp_assert_success(to, "{ 'execute' : 'cont'}"); 1039 } 1040 wait_for_resume(to, &dst_state); 1041 1042 wait_for_serial("dest_serial"); 1043 1044 if (check_offset) { 1045 file_check_offset_region(); 1046 } 1047 1048 finish: 1049 if (args->end_hook) { 1050 args->end_hook(from, to, data_hook); 1051 } 1052 1053 migrate_end(from, to, args->result == MIG_TEST_SUCCEED); 1054 } 1055 1056 void *migrate_hook_start_precopy_tcp_multifd_common(QTestState *from, 1057 QTestState *to, 1058 const char *method) 1059 { 1060 migrate_set_parameter_str(from, "multifd-compression", method); 1061 migrate_set_parameter_str(to, "multifd-compression", method); 1062 1063 /* Start incoming migration from the 1st socket */ 1064 migrate_incoming_qmp(to, "tcp:127.0.0.1:0", NULL, "{}"); 1065 1066 return NULL; 1067 } 1068 1069 QTestMigrationState *get_src(void) 1070 { 1071 return &src_state; 1072 } 1073 1074 QTestMigrationState *get_dst(void) 1075 { 1076 return &dst_state; 1077 } 1078 1079 MigrationTestEnv *migration_get_env(void) 1080 { 1081 static MigrationTestEnv *env; 1082 g_autoptr(GError) err = NULL; 1083 1084 if (env) { 1085 return env; 1086 } 1087 1088 env = g_new0(MigrationTestEnv, 1); 1089 env->qemu_src = getenv(QEMU_ENV_SRC); 1090 env->qemu_dst = getenv(QEMU_ENV_DST); 1091 1092 /* 1093 * The default QTEST_QEMU_BINARY must always be provided because 1094 * that is what helpers use to query the accel type and 1095 * architecture. 1096 */ 1097 if (env->qemu_src && env->qemu_dst) { 1098 g_test_message("Only one of %s, %s is allowed", 1099 QEMU_ENV_SRC, QEMU_ENV_DST); 1100 exit(1); 1101 } 1102 1103 env->has_kvm = qtest_has_accel("kvm"); 1104 env->has_tcg = qtest_has_accel("tcg"); 1105 1106 if (!env->has_tcg && !env->has_kvm) { 1107 g_test_skip("No KVM or TCG accelerator available"); 1108 return env; 1109 } 1110 1111 env->has_dirty_ring = kvm_dirty_ring_supported(); 1112 env->has_uffd = ufd_version_check(&env->uffd_feature_thread_id); 1113 env->arch = qtest_get_arch(); 1114 env->is_x86 = !strcmp(env->arch, "i386") || !strcmp(env->arch, "x86_64"); 1115 1116 env->tmpfs = g_dir_make_tmp("migration-test-XXXXXX", &err); 1117 if (!env->tmpfs) { 1118 g_test_message("Can't create temporary directory in %s: %s", 1119 g_get_tmp_dir(), err->message); 1120 } 1121 g_assert(env->tmpfs); 1122 1123 tmpfs = env->tmpfs; 1124 1125 return env; 1126 } 1127 1128 int migration_env_clean(MigrationTestEnv *env) 1129 { 1130 char *tmpfs; 1131 int ret = 0; 1132 1133 if (!env) { 1134 return ret; 1135 } 1136 1137 bootfile_delete(); 1138 1139 tmpfs = env->tmpfs; 1140 ret = rmdir(tmpfs); 1141 if (ret != 0) { 1142 g_test_message("unable to rmdir: path (%s): %s", 1143 tmpfs, strerror(errno)); 1144 } 1145 g_free(tmpfs); 1146 1147 return ret; 1148 } 1149