1 /*
2 * QTest testcase for precopy migration
3 *
4 * Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates
5 * based on the vhost-user-test.c that is:
6 * Copyright (c) 2014 Virtual Open Systems Sarl.
7 *
8 * This work is licensed under the terms of the GNU GPL, version 2 or later.
9 * See the COPYING file in the top-level directory.
10 *
11 */
12
13 #include "qemu/osdep.h"
14 #include "chardev/char.h"
15 #include "crypto/tlscredspsk.h"
16 #include "libqtest.h"
17 #include "migration/bootfile.h"
18 #include "migration/framework.h"
19 #include "migration/migration-qmp.h"
20 #include "migration/migration-util.h"
21 #include "ppc-util.h"
22 #include "qobject/qlist.h"
23 #include "qapi-types-migration.h"
24 #include "qemu/module.h"
25 #include "qemu/option.h"
26 #include "qemu/range.h"
27 #include "qemu/sockets.h"
28
29
30 /*
31 * Dirtylimit stop working if dirty page rate error
32 * value less than DIRTYLIMIT_TOLERANCE_RANGE
33 */
34 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
35
36 static char *tmpfs;
37
test_precopy_unix_plain(void)38 static void test_precopy_unix_plain(void)
39 {
40 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
41 MigrateCommon args = {
42 .listen_uri = uri,
43 .connect_uri = uri,
44 /*
45 * The simplest use case of precopy, covering smoke tests of
46 * get-dirty-log dirty tracking.
47 */
48 .live = true,
49 };
50
51 test_precopy_common(&args);
52 }
53
test_precopy_unix_suspend_live(void)54 static void test_precopy_unix_suspend_live(void)
55 {
56 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
57 MigrateCommon args = {
58 .listen_uri = uri,
59 .connect_uri = uri,
60 /*
61 * despite being live, the test is fast because the src
62 * suspends immediately.
63 */
64 .live = true,
65 .start.suspend_me = true,
66 };
67
68 test_precopy_common(&args);
69 }
70
test_precopy_unix_suspend_notlive(void)71 static void test_precopy_unix_suspend_notlive(void)
72 {
73 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
74 MigrateCommon args = {
75 .listen_uri = uri,
76 .connect_uri = uri,
77 .start.suspend_me = true,
78 };
79
80 test_precopy_common(&args);
81 }
82
test_precopy_unix_dirty_ring(void)83 static void test_precopy_unix_dirty_ring(void)
84 {
85 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
86 MigrateCommon args = {
87 .start = {
88 .use_dirty_ring = true,
89 },
90 .listen_uri = uri,
91 .connect_uri = uri,
92 /*
93 * Besides the precopy/unix basic test, cover dirty ring interface
94 * rather than get-dirty-log.
95 */
96 .live = true,
97 };
98
99 test_precopy_common(&args);
100 }
101
test_precopy_tcp_plain(void)102 static void test_precopy_tcp_plain(void)
103 {
104 MigrateCommon args = {
105 .listen_uri = "tcp:127.0.0.1:0",
106 };
107
108 test_precopy_common(&args);
109 }
110
migrate_hook_start_switchover_ack(QTestState * from,QTestState * to)111 static void *migrate_hook_start_switchover_ack(QTestState *from, QTestState *to)
112 {
113
114 migrate_set_capability(from, "return-path", true);
115 migrate_set_capability(to, "return-path", true);
116
117 migrate_set_capability(from, "switchover-ack", true);
118 migrate_set_capability(to, "switchover-ack", true);
119
120 return NULL;
121 }
122
test_precopy_tcp_switchover_ack(void)123 static void test_precopy_tcp_switchover_ack(void)
124 {
125 MigrateCommon args = {
126 .listen_uri = "tcp:127.0.0.1:0",
127 .start_hook = migrate_hook_start_switchover_ack,
128 /*
129 * Source VM must be running in order to consider the switchover ACK
130 * when deciding to do switchover or not.
131 */
132 .live = true,
133 };
134
135 test_precopy_common(&args);
136 }
137
138 #ifndef _WIN32
migrate_hook_start_fd(QTestState * from,QTestState * to)139 static void *migrate_hook_start_fd(QTestState *from,
140 QTestState *to)
141 {
142 int ret;
143 int pair[2];
144
145 /* Create two connected sockets for migration */
146 ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair);
147 g_assert_cmpint(ret, ==, 0);
148
149 /* Send the 1st socket to the target */
150 qtest_qmp_fds_assert_success(to, &pair[0], 1,
151 "{ 'execute': 'getfd',"
152 " 'arguments': { 'fdname': 'fd-mig' }}");
153 close(pair[0]);
154
155 /* Start incoming migration from the 1st socket */
156 migrate_incoming_qmp(to, "fd:fd-mig", NULL, "{}");
157
158 /* Send the 2nd socket to the target */
159 qtest_qmp_fds_assert_success(from, &pair[1], 1,
160 "{ 'execute': 'getfd',"
161 " 'arguments': { 'fdname': 'fd-mig' }}");
162 close(pair[1]);
163
164 return NULL;
165 }
166
migrate_hook_end_fd(QTestState * from,QTestState * to,void * opaque)167 static void migrate_hook_end_fd(QTestState *from,
168 QTestState *to,
169 void *opaque)
170 {
171 QDict *rsp;
172 const char *error_desc;
173
174 /* Test closing fds */
175 /*
176 * We assume, that QEMU removes named fd from its list,
177 * so this should fail.
178 */
179 rsp = qtest_qmp(from,
180 "{ 'execute': 'closefd',"
181 " 'arguments': { 'fdname': 'fd-mig' }}");
182 g_assert_true(qdict_haskey(rsp, "error"));
183 error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc");
184 g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found");
185 qobject_unref(rsp);
186
187 rsp = qtest_qmp(to,
188 "{ 'execute': 'closefd',"
189 " 'arguments': { 'fdname': 'fd-mig' }}");
190 g_assert_true(qdict_haskey(rsp, "error"));
191 error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc");
192 g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found");
193 qobject_unref(rsp);
194 }
195
test_precopy_fd_socket(void)196 static void test_precopy_fd_socket(void)
197 {
198 MigrateCommon args = {
199 .listen_uri = "defer",
200 .connect_uri = "fd:fd-mig",
201 .start_hook = migrate_hook_start_fd,
202 .end_hook = migrate_hook_end_fd,
203 };
204 test_precopy_common(&args);
205 }
206
migrate_hook_start_precopy_fd_file(QTestState * from,QTestState * to)207 static void *migrate_hook_start_precopy_fd_file(QTestState *from,
208 QTestState *to)
209 {
210 g_autofree char *file = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME);
211 int src_flags = O_CREAT | O_RDWR;
212 int dst_flags = O_CREAT | O_RDWR;
213 int fds[2];
214
215 fds[0] = open(file, src_flags, 0660);
216 assert(fds[0] != -1);
217
218 fds[1] = open(file, dst_flags, 0660);
219 assert(fds[1] != -1);
220
221
222 qtest_qmp_fds_assert_success(to, &fds[0], 1,
223 "{ 'execute': 'getfd',"
224 " 'arguments': { 'fdname': 'fd-mig' }}");
225
226 qtest_qmp_fds_assert_success(from, &fds[1], 1,
227 "{ 'execute': 'getfd',"
228 " 'arguments': { 'fdname': 'fd-mig' }}");
229
230 close(fds[0]);
231 close(fds[1]);
232
233 return NULL;
234 }
235
test_precopy_fd_file(void)236 static void test_precopy_fd_file(void)
237 {
238 MigrateCommon args = {
239 .listen_uri = "defer",
240 .connect_uri = "fd:fd-mig",
241 .start_hook = migrate_hook_start_precopy_fd_file,
242 .end_hook = migrate_hook_end_fd,
243 };
244 test_file_common(&args, true);
245 }
246 #endif /* _WIN32 */
247
248 /*
249 * The way auto_converge works, we need to do too many passes to
250 * run this test. Auto_converge logic is only run once every
251 * three iterations, so:
252 *
253 * - 3 iterations without auto_converge enabled
254 * - 3 iterations with pct = 5
255 * - 3 iterations with pct = 30
256 * - 3 iterations with pct = 55
257 * - 3 iterations with pct = 80
258 * - 3 iterations with pct = 95 (max(95, 80 + 25))
259 *
260 * To make things even worse, we need to run the initial stage at
261 * 3MB/s so we enter autoconverge even when host is (over)loaded.
262 */
test_auto_converge(void)263 static void test_auto_converge(void)
264 {
265 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
266 MigrateStart args = {};
267 QTestState *from, *to;
268 int64_t percentage;
269
270 /*
271 * We want the test to be stable and as fast as possible.
272 * E.g., with 1Gb/s bandwidth migration may pass without throttling,
273 * so we need to decrease a bandwidth.
274 */
275 const int64_t init_pct = 5, inc_pct = 25, max_pct = 95;
276 uint64_t prev_dirty_sync_cnt, dirty_sync_cnt;
277 int max_try_count, hit = 0;
278
279 if (migrate_start(&from, &to, uri, &args)) {
280 return;
281 }
282
283 migrate_set_capability(from, "auto-converge", true);
284 migrate_set_parameter_int(from, "cpu-throttle-initial", init_pct);
285 migrate_set_parameter_int(from, "cpu-throttle-increment", inc_pct);
286 migrate_set_parameter_int(from, "max-cpu-throttle", max_pct);
287
288 /*
289 * Set the initial parameters so that the migration could not converge
290 * without throttling.
291 */
292 migrate_ensure_non_converge(from);
293
294 /* To check remaining size after precopy */
295 migrate_set_capability(from, "pause-before-switchover", true);
296
297 /* Wait for the first serial output from the source */
298 wait_for_serial("src_serial");
299
300 migrate_qmp(from, to, uri, NULL, "{}");
301
302 /* Wait for throttling begins */
303 percentage = 0;
304 do {
305 percentage = read_migrate_property_int(from, "cpu-throttle-percentage");
306 if (percentage != 0) {
307 break;
308 }
309 usleep(20);
310 g_assert_false(get_src()->stop_seen);
311 } while (true);
312 /* The first percentage of throttling should be at least init_pct */
313 g_assert_cmpint(percentage, >=, init_pct);
314
315 /*
316 * End the loop when the dirty sync count greater than 1.
317 */
318 while ((dirty_sync_cnt = get_migration_pass(from)) < 2) {
319 usleep(1000 * 1000);
320 }
321
322 prev_dirty_sync_cnt = dirty_sync_cnt;
323
324 /*
325 * The RAMBlock dirty sync count must changes in 5 seconds, here we set
326 * the timeout to 10 seconds to ensure it changes.
327 *
328 * Note that migrate_ensure_non_converge set the max-bandwidth to 3MB/s,
329 * while the qtest mem is >= 100MB, one iteration takes at least 33s (100/3)
330 * to complete; this ensures that the RAMBlock dirty sync occurs.
331 */
332 max_try_count = 10;
333 while (--max_try_count) {
334 dirty_sync_cnt = get_migration_pass(from);
335 if (dirty_sync_cnt != prev_dirty_sync_cnt) {
336 hit = 1;
337 break;
338 }
339 prev_dirty_sync_cnt = dirty_sync_cnt;
340 sleep(1);
341 }
342 g_assert_cmpint(hit, ==, 1);
343
344 /* Now, when we tested that throttling works, let it converge */
345 migrate_ensure_converge(from);
346
347 /*
348 * Wait for pre-switchover status to check last throttle percentage
349 * and remaining. These values will be zeroed later
350 */
351 wait_for_migration_status(from, "pre-switchover", NULL);
352
353 /* The final percentage of throttling shouldn't be greater than max_pct */
354 percentage = read_migrate_property_int(from, "cpu-throttle-percentage");
355 g_assert_cmpint(percentage, <=, max_pct);
356 migrate_continue(from, "pre-switchover");
357
358 qtest_qmp_eventwait(to, "RESUME");
359
360 wait_for_serial("dest_serial");
361 wait_for_migration_complete(from);
362
363 migrate_end(from, to, true);
364 }
365
366 static void *
migrate_hook_start_precopy_tcp_multifd(QTestState * from,QTestState * to)367 migrate_hook_start_precopy_tcp_multifd(QTestState *from,
368 QTestState *to)
369 {
370 return migrate_hook_start_precopy_tcp_multifd_common(from, to, "none");
371 }
372
373 static void *
migrate_hook_start_precopy_tcp_multifd_zero_page_legacy(QTestState * from,QTestState * to)374 migrate_hook_start_precopy_tcp_multifd_zero_page_legacy(QTestState *from,
375 QTestState *to)
376 {
377 migrate_hook_start_precopy_tcp_multifd_common(from, to, "none");
378 migrate_set_parameter_str(from, "zero-page-detection", "legacy");
379 return NULL;
380 }
381
382 static void *
migrate_hook_start_precopy_tcp_multifd_no_zero_page(QTestState * from,QTestState * to)383 migrate_hook_start_precopy_tcp_multifd_no_zero_page(QTestState *from,
384 QTestState *to)
385 {
386 migrate_hook_start_precopy_tcp_multifd_common(from, to, "none");
387 migrate_set_parameter_str(from, "zero-page-detection", "none");
388 return NULL;
389 }
390
test_multifd_tcp_uri_none(void)391 static void test_multifd_tcp_uri_none(void)
392 {
393 MigrateCommon args = {
394 .listen_uri = "defer",
395 .start_hook = migrate_hook_start_precopy_tcp_multifd,
396 /*
397 * Multifd is more complicated than most of the features, it
398 * directly takes guest page buffers when sending, make sure
399 * everything will work alright even if guest page is changing.
400 */
401 .live = true,
402 };
403 test_precopy_common(&args);
404 }
405
test_multifd_tcp_zero_page_legacy(void)406 static void test_multifd_tcp_zero_page_legacy(void)
407 {
408 MigrateCommon args = {
409 .listen_uri = "defer",
410 .start_hook = migrate_hook_start_precopy_tcp_multifd_zero_page_legacy,
411 /*
412 * Multifd is more complicated than most of the features, it
413 * directly takes guest page buffers when sending, make sure
414 * everything will work alright even if guest page is changing.
415 */
416 .live = true,
417 };
418 test_precopy_common(&args);
419 }
420
test_multifd_tcp_no_zero_page(void)421 static void test_multifd_tcp_no_zero_page(void)
422 {
423 MigrateCommon args = {
424 .listen_uri = "defer",
425 .start_hook = migrate_hook_start_precopy_tcp_multifd_no_zero_page,
426 /*
427 * Multifd is more complicated than most of the features, it
428 * directly takes guest page buffers when sending, make sure
429 * everything will work alright even if guest page is changing.
430 */
431 .live = true,
432 };
433 test_precopy_common(&args);
434 }
435
test_multifd_tcp_channels_none(void)436 static void test_multifd_tcp_channels_none(void)
437 {
438 MigrateCommon args = {
439 .listen_uri = "defer",
440 .start_hook = migrate_hook_start_precopy_tcp_multifd,
441 .live = true,
442 .connect_channels = ("[ { 'channel-type': 'main',"
443 " 'addr': { 'transport': 'socket',"
444 " 'type': 'inet',"
445 " 'host': '127.0.0.1',"
446 " 'port': '0' } } ]"),
447 };
448 test_precopy_common(&args);
449 }
450
451 /*
452 * This test does:
453 * source target
454 * migrate_incoming
455 * migrate
456 * migrate_cancel
457 * launch another target
458 * migrate
459 *
460 * And see that it works
461 */
test_multifd_tcp_cancel(void)462 static void test_multifd_tcp_cancel(void)
463 {
464 MigrateStart args = {
465 .hide_stderr = true,
466 };
467 QTestState *from, *to, *to2;
468
469 if (migrate_start(&from, &to, "defer", &args)) {
470 return;
471 }
472
473 migrate_ensure_non_converge(from);
474 migrate_prepare_for_dirty_mem(from);
475
476 migrate_set_parameter_int(from, "multifd-channels", 16);
477 migrate_set_parameter_int(to, "multifd-channels", 16);
478
479 migrate_set_capability(from, "multifd", true);
480 migrate_set_capability(to, "multifd", true);
481
482 /* Start incoming migration from the 1st socket */
483 migrate_incoming_qmp(to, "tcp:127.0.0.1:0", NULL, "{}");
484
485 /* Wait for the first serial output from the source */
486 wait_for_serial("src_serial");
487
488 migrate_qmp(from, to, NULL, NULL, "{}");
489
490 migrate_wait_for_dirty_mem(from, to);
491
492 migrate_cancel(from);
493
494 /* Make sure QEMU process "to" exited */
495 qtest_set_expected_status(to, EXIT_FAILURE);
496 qtest_wait_qemu(to);
497 qtest_quit(to);
498
499 /*
500 * Ensure the source QEMU finishes its cancellation process before we
501 * proceed with the setup of the next migration. The migrate_start()
502 * function and others might want to interact with the source in a way that
503 * is not possible while the migration is not canceled properly. For
504 * example, setting migration capabilities when the migration is still
505 * running leads to an error.
506 */
507 wait_for_migration_status(from, "cancelled", NULL);
508
509 args = (MigrateStart){
510 .only_target = true,
511 };
512
513 if (migrate_start(&from, &to2, "defer", &args)) {
514 return;
515 }
516
517 migrate_set_parameter_int(to2, "multifd-channels", 16);
518
519 migrate_set_capability(to2, "multifd", true);
520
521 /* Start incoming migration from the 1st socket */
522 migrate_incoming_qmp(to2, "tcp:127.0.0.1:0", NULL, "{}");
523
524 migrate_ensure_non_converge(from);
525
526 migrate_qmp(from, to2, NULL, NULL, "{}");
527
528 migrate_wait_for_dirty_mem(from, to2);
529
530 migrate_ensure_converge(from);
531
532 wait_for_stop(from, get_src());
533 qtest_qmp_eventwait(to2, "RESUME");
534
535 wait_for_serial("dest_serial");
536 wait_for_migration_complete(from);
537 migrate_end(from, to2, true);
538 }
539
test_cancel_src_after_failed(QTestState * from,QTestState * to,const char * uri,const char * phase)540 static void test_cancel_src_after_failed(QTestState *from, QTestState *to,
541 const char *uri, const char *phase)
542 {
543 /*
544 * No migrate_incoming_qmp() at the start to force source into
545 * failed state during migrate_qmp().
546 */
547
548 wait_for_serial("src_serial");
549 migrate_ensure_converge(from);
550
551 migrate_qmp(from, to, uri, NULL, "{}");
552
553 migration_event_wait(from, phase);
554 migrate_cancel(from);
555
556 /* cancelling will not move the migration out of 'failed' */
557
558 wait_for_migration_status(from, "failed",
559 (const char * []) { "completed", NULL });
560
561 /*
562 * Not waiting for the destination because it never started
563 * migration.
564 */
565 }
566
test_cancel_src_after_cancelled(QTestState * from,QTestState * to,const char * uri,const char * phase)567 static void test_cancel_src_after_cancelled(QTestState *from, QTestState *to,
568 const char *uri, const char *phase)
569 {
570 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
571
572 wait_for_serial("src_serial");
573 migrate_ensure_converge(from);
574
575 migrate_qmp(from, to, uri, NULL, "{}");
576
577 /* To move to cancelled/cancelling */
578 migrate_cancel(from);
579 migration_event_wait(from, phase);
580
581 /* The migrate_cancel under test */
582 migrate_cancel(from);
583
584 wait_for_migration_status(from, "cancelled",
585 (const char * []) { "completed", NULL });
586
587 wait_for_migration_status(to, "failed",
588 (const char * []) { "completed", NULL });
589 }
590
test_cancel_src_after_complete(QTestState * from,QTestState * to,const char * uri,const char * phase)591 static void test_cancel_src_after_complete(QTestState *from, QTestState *to,
592 const char *uri, const char *phase)
593 {
594 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
595
596 wait_for_serial("src_serial");
597 migrate_ensure_converge(from);
598
599 migrate_qmp(from, to, uri, NULL, "{}");
600
601 migration_event_wait(from, phase);
602 migrate_cancel(from);
603
604 /*
605 * qmp_migrate_cancel() exits early if migration is not running
606 * anymore, the status will not change to cancelled.
607 */
608 wait_for_migration_complete(from);
609 wait_for_migration_complete(to);
610 }
611
test_cancel_src_after_none(QTestState * from,QTestState * to,const char * uri,const char * phase)612 static void test_cancel_src_after_none(QTestState *from, QTestState *to,
613 const char *uri, const char *phase)
614 {
615 /*
616 * Test that cancelling without a migration happening does not
617 * affect subsequent migrations
618 */
619 migrate_cancel(to);
620
621 wait_for_serial("src_serial");
622 migrate_cancel(from);
623
624 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
625
626 migrate_ensure_converge(from);
627 migrate_qmp(from, to, uri, NULL, "{}");
628
629 wait_for_migration_complete(from);
630 wait_for_migration_complete(to);
631 }
632
test_cancel_src_pre_switchover(QTestState * from,QTestState * to,const char * uri,const char * phase)633 static void test_cancel_src_pre_switchover(QTestState *from, QTestState *to,
634 const char *uri, const char *phase)
635 {
636 migrate_set_capability(from, "pause-before-switchover", true);
637 migrate_set_capability(to, "pause-before-switchover", true);
638
639 migrate_set_capability(from, "multifd", true);
640 migrate_set_capability(to, "multifd", true);
641
642 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
643
644 wait_for_serial("src_serial");
645 migrate_ensure_converge(from);
646
647 migrate_qmp(from, to, uri, NULL, "{}");
648
649 migration_event_wait(from, phase);
650 migrate_cancel(from);
651 migration_event_wait(from, "cancelling");
652
653 wait_for_migration_status(from, "cancelled",
654 (const char * []) { "completed", NULL });
655
656 wait_for_migration_status(to, "failed",
657 (const char * []) { "completed", NULL });
658 }
659
test_cancel_src_after_status(void * opaque)660 static void test_cancel_src_after_status(void *opaque)
661 {
662 const char *test_path = opaque;
663 g_autofree char *phase = g_path_get_basename(test_path);
664 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
665 QTestState *from, *to;
666 MigrateStart args = {
667 .hide_stderr = true,
668 };
669
670 if (migrate_start(&from, &to, "defer", &args)) {
671 return;
672 }
673
674 if (g_str_equal(phase, "cancelling") ||
675 g_str_equal(phase, "cancelled")) {
676 test_cancel_src_after_cancelled(from, to, uri, phase);
677
678 } else if (g_str_equal(phase, "completed")) {
679 test_cancel_src_after_complete(from, to, uri, phase);
680
681 } else if (g_str_equal(phase, "failed")) {
682 test_cancel_src_after_failed(from, to, uri, phase);
683
684 } else if (g_str_equal(phase, "none")) {
685 test_cancel_src_after_none(from, to, uri, phase);
686
687 } else {
688 /* any state that comes before pre-switchover */
689 test_cancel_src_pre_switchover(from, to, uri, phase);
690 }
691
692 migrate_end(from, to, false);
693 }
694
calc_dirty_rate(QTestState * who,uint64_t calc_time)695 static void calc_dirty_rate(QTestState *who, uint64_t calc_time)
696 {
697 qtest_qmp_assert_success(who,
698 "{ 'execute': 'calc-dirty-rate',"
699 "'arguments': { "
700 "'calc-time': %" PRIu64 ","
701 "'mode': 'dirty-ring' }}",
702 calc_time);
703 }
704
query_dirty_rate(QTestState * who)705 static QDict *query_dirty_rate(QTestState *who)
706 {
707 return qtest_qmp_assert_success_ref(who,
708 "{ 'execute': 'query-dirty-rate' }");
709 }
710
dirtylimit_set_all(QTestState * who,uint64_t dirtyrate)711 static void dirtylimit_set_all(QTestState *who, uint64_t dirtyrate)
712 {
713 qtest_qmp_assert_success(who,
714 "{ 'execute': 'set-vcpu-dirty-limit',"
715 "'arguments': { "
716 "'dirty-rate': %" PRIu64 " } }",
717 dirtyrate);
718 }
719
cancel_vcpu_dirty_limit(QTestState * who)720 static void cancel_vcpu_dirty_limit(QTestState *who)
721 {
722 qtest_qmp_assert_success(who,
723 "{ 'execute': 'cancel-vcpu-dirty-limit' }");
724 }
725
query_vcpu_dirty_limit(QTestState * who)726 static QDict *query_vcpu_dirty_limit(QTestState *who)
727 {
728 QDict *rsp;
729
730 rsp = qtest_qmp(who, "{ 'execute': 'query-vcpu-dirty-limit' }");
731 g_assert(!qdict_haskey(rsp, "error"));
732 g_assert(qdict_haskey(rsp, "return"));
733
734 return rsp;
735 }
736
calc_dirtyrate_ready(QTestState * who)737 static bool calc_dirtyrate_ready(QTestState *who)
738 {
739 QDict *rsp_return;
740 const char *status;
741 bool ready;
742
743 rsp_return = query_dirty_rate(who);
744 g_assert(rsp_return);
745
746 status = qdict_get_str(rsp_return, "status");
747 g_assert(status);
748 ready = g_strcmp0(status, "measuring");
749 qobject_unref(rsp_return);
750
751 return ready;
752 }
753
wait_for_calc_dirtyrate_complete(QTestState * who,int64_t time_s)754 static void wait_for_calc_dirtyrate_complete(QTestState *who,
755 int64_t time_s)
756 {
757 int max_try_count = 10000;
758 usleep(time_s * 1000000);
759
760 while (!calc_dirtyrate_ready(who) && max_try_count--) {
761 usleep(1000);
762 }
763
764 /*
765 * Set the timeout with 10 s(max_try_count * 1000us),
766 * if dirtyrate measurement not complete, fail test.
767 */
768 g_assert_cmpint(max_try_count, !=, 0);
769 }
770
get_dirty_rate(QTestState * who)771 static int64_t get_dirty_rate(QTestState *who)
772 {
773 QDict *rsp_return;
774 const char *status;
775 QList *rates;
776 const QListEntry *entry;
777 QDict *rate;
778 int64_t dirtyrate;
779
780 rsp_return = query_dirty_rate(who);
781 g_assert(rsp_return);
782
783 status = qdict_get_str(rsp_return, "status");
784 g_assert(status);
785 g_assert_cmpstr(status, ==, "measured");
786
787 rates = qdict_get_qlist(rsp_return, "vcpu-dirty-rate");
788 g_assert(rates && !qlist_empty(rates));
789
790 entry = qlist_first(rates);
791 g_assert(entry);
792
793 rate = qobject_to(QDict, qlist_entry_obj(entry));
794 g_assert(rate);
795
796 dirtyrate = qdict_get_try_int(rate, "dirty-rate", -1);
797
798 qobject_unref(rsp_return);
799 return dirtyrate;
800 }
801
get_limit_rate(QTestState * who)802 static int64_t get_limit_rate(QTestState *who)
803 {
804 QDict *rsp_return;
805 QList *rates;
806 const QListEntry *entry;
807 QDict *rate;
808 int64_t dirtyrate;
809
810 rsp_return = query_vcpu_dirty_limit(who);
811 g_assert(rsp_return);
812
813 rates = qdict_get_qlist(rsp_return, "return");
814 g_assert(rates && !qlist_empty(rates));
815
816 entry = qlist_first(rates);
817 g_assert(entry);
818
819 rate = qobject_to(QDict, qlist_entry_obj(entry));
820 g_assert(rate);
821
822 dirtyrate = qdict_get_try_int(rate, "limit-rate", -1);
823
824 qobject_unref(rsp_return);
825 return dirtyrate;
826 }
827
dirtylimit_start_vm(void)828 static QTestState *dirtylimit_start_vm(void)
829 {
830 QTestState *vm = NULL;
831 g_autofree gchar *cmd = NULL;
832 const char *bootpath;
833
834 bootpath = bootfile_create(qtest_get_arch(), tmpfs, false);
835 cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 "
836 "-name dirtylimit-test,debug-threads=on "
837 "-m 150M -smp 1 "
838 "-serial file:%s/vm_serial "
839 "-drive file=%s,format=raw ",
840 tmpfs, bootpath);
841
842 vm = qtest_init(cmd);
843 return vm;
844 }
845
dirtylimit_stop_vm(QTestState * vm)846 static void dirtylimit_stop_vm(QTestState *vm)
847 {
848 g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, "vm_serial");
849
850 qtest_quit(vm);
851 unlink(path);
852 }
853
test_vcpu_dirty_limit(void)854 static void test_vcpu_dirty_limit(void)
855 {
856 QTestState *vm;
857 int64_t origin_rate;
858 int64_t quota_rate;
859 int64_t rate ;
860 int max_try_count = 20;
861 int hit = 0;
862
863 /* Start vm for vcpu dirtylimit test */
864 vm = dirtylimit_start_vm();
865
866 /* Wait for the first serial output from the vm*/
867 wait_for_serial("vm_serial");
868
869 /* Do dirtyrate measurement with calc time equals 1s */
870 calc_dirty_rate(vm, 1);
871
872 /* Sleep calc time and wait for calc dirtyrate complete */
873 wait_for_calc_dirtyrate_complete(vm, 1);
874
875 /* Query original dirty page rate */
876 origin_rate = get_dirty_rate(vm);
877
878 /* VM booted from bootsect should dirty memory steadily */
879 assert(origin_rate != 0);
880
881 /* Setup quota dirty page rate at half of origin */
882 quota_rate = origin_rate / 2;
883
884 /* Set dirtylimit */
885 dirtylimit_set_all(vm, quota_rate);
886
887 /*
888 * Check if set-vcpu-dirty-limit and query-vcpu-dirty-limit
889 * works literally
890 */
891 g_assert_cmpint(quota_rate, ==, get_limit_rate(vm));
892
893 /* Sleep a bit to check if it take effect */
894 usleep(2000000);
895
896 /*
897 * Check if dirtylimit take effect realistically, set the
898 * timeout with 20 s(max_try_count * 1s), if dirtylimit
899 * doesn't take effect, fail test.
900 */
901 while (--max_try_count) {
902 calc_dirty_rate(vm, 1);
903 wait_for_calc_dirtyrate_complete(vm, 1);
904 rate = get_dirty_rate(vm);
905
906 /*
907 * Assume hitting if current rate is less
908 * than quota rate (within accepting error)
909 */
910 if (rate < (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) {
911 hit = 1;
912 break;
913 }
914 }
915
916 g_assert_cmpint(hit, ==, 1);
917
918 hit = 0;
919 max_try_count = 20;
920
921 /* Check if dirtylimit cancellation take effect */
922 cancel_vcpu_dirty_limit(vm);
923 while (--max_try_count) {
924 calc_dirty_rate(vm, 1);
925 wait_for_calc_dirtyrate_complete(vm, 1);
926 rate = get_dirty_rate(vm);
927
928 /*
929 * Assume dirtylimit be canceled if current rate is
930 * greater than quota rate (within accepting error)
931 */
932 if (rate > (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) {
933 hit = 1;
934 break;
935 }
936 }
937
938 g_assert_cmpint(hit, ==, 1);
939 dirtylimit_stop_vm(vm);
940 }
941
migrate_dirty_limit_wait_showup(QTestState * from,const int64_t period,const int64_t value)942 static void migrate_dirty_limit_wait_showup(QTestState *from,
943 const int64_t period,
944 const int64_t value)
945 {
946 /* Enable dirty limit capability */
947 migrate_set_capability(from, "dirty-limit", true);
948
949 /* Set dirty limit parameters */
950 migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period);
951 migrate_set_parameter_int(from, "vcpu-dirty-limit", value);
952
953 /* Make sure migrate can't converge */
954 migrate_ensure_non_converge(from);
955
956 /* To check limit rate after precopy */
957 migrate_set_capability(from, "pause-before-switchover", true);
958
959 /* Wait for the serial output from the source */
960 wait_for_serial("src_serial");
961 }
962
963 /*
964 * This test does:
965 * source destination
966 * start vm
967 * start incoming vm
968 * migrate
969 * wait dirty limit to begin
970 * cancel migrate
971 * cancellation check
972 * restart incoming vm
973 * migrate
974 * wait dirty limit to begin
975 * wait pre-switchover event
976 * convergence condition check
977 *
978 * And see if dirty limit migration works correctly.
979 * This test case involves many passes, so it runs in slow mode only.
980 */
test_dirty_limit(void)981 static void test_dirty_limit(void)
982 {
983 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
984 QTestState *from, *to;
985 int64_t remaining;
986 uint64_t throttle_us_per_full;
987 /*
988 * We want the test to be stable and as fast as possible.
989 * E.g., with 1Gb/s bandwidth migration may pass without dirty limit,
990 * so we need to decrease a bandwidth.
991 */
992 const int64_t dirtylimit_period = 1000, dirtylimit_value = 50;
993 const int64_t max_bandwidth = 400000000; /* ~400Mb/s */
994 const int64_t downtime_limit = 250; /* 250ms */
995 /*
996 * We migrate through unix-socket (> 500Mb/s).
997 * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s).
998 * So, we can predict expected_threshold
999 */
1000 const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000;
1001 int max_try_count = 10;
1002 MigrateCommon args = {
1003 .start = {
1004 .hide_stderr = true,
1005 .use_dirty_ring = true,
1006 },
1007 .listen_uri = uri,
1008 .connect_uri = uri,
1009 };
1010
1011 /* Start src, dst vm */
1012 if (migrate_start(&from, &to, args.listen_uri, &args.start)) {
1013 return;
1014 }
1015
1016 /* Prepare for dirty limit migration and wait src vm show up */
1017 migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value);
1018
1019 /* Start migrate */
1020 migrate_qmp(from, to, args.connect_uri, NULL, "{}");
1021
1022 /* Wait for dirty limit throttle begin */
1023 throttle_us_per_full = 0;
1024 while (throttle_us_per_full == 0) {
1025 throttle_us_per_full =
1026 read_migrate_property_int(from,
1027 "dirty-limit-throttle-time-per-round");
1028 usleep(100);
1029 g_assert_false(get_src()->stop_seen);
1030 }
1031
1032 /* Now cancel migrate and wait for dirty limit throttle switch off */
1033 migrate_cancel(from);
1034 wait_for_migration_status(from, "cancelled", NULL);
1035
1036 /* destination always fails after cancel */
1037 migration_event_wait(to, "failed");
1038 qtest_set_expected_status(to, EXIT_FAILURE);
1039 qtest_quit(to);
1040
1041 /* Check if dirty limit throttle switched off, set timeout 1ms */
1042 do {
1043 throttle_us_per_full =
1044 read_migrate_property_int(from,
1045 "dirty-limit-throttle-time-per-round");
1046 usleep(100);
1047 g_assert_false(get_src()->stop_seen);
1048 } while (throttle_us_per_full != 0 && --max_try_count);
1049
1050 /* Assert dirty limit is not in service */
1051 g_assert_cmpint(throttle_us_per_full, ==, 0);
1052
1053 args = (MigrateCommon) {
1054 .start = {
1055 .only_target = true,
1056 .use_dirty_ring = true,
1057 },
1058 .listen_uri = uri,
1059 .connect_uri = uri,
1060 };
1061
1062 /* Restart dst vm, src vm already show up so we needn't wait anymore */
1063 if (migrate_start(&from, &to, args.listen_uri, &args.start)) {
1064 return;
1065 }
1066
1067 /* Start migrate */
1068 migrate_qmp(from, to, args.connect_uri, NULL, "{}");
1069
1070 /* Wait for dirty limit throttle begin */
1071 throttle_us_per_full = 0;
1072 while (throttle_us_per_full == 0) {
1073 throttle_us_per_full =
1074 read_migrate_property_int(from,
1075 "dirty-limit-throttle-time-per-round");
1076 usleep(100);
1077 g_assert_false(get_src()->stop_seen);
1078 }
1079
1080 /*
1081 * The dirty limit rate should equals the return value of
1082 * query-vcpu-dirty-limit if dirty limit cap set
1083 */
1084 g_assert_cmpint(dirtylimit_value, ==, get_limit_rate(from));
1085
1086 /* Now, we have tested if dirty limit works, let it converge */
1087 migrate_set_parameter_int(from, "downtime-limit", downtime_limit);
1088 migrate_set_parameter_int(from, "max-bandwidth", max_bandwidth);
1089
1090 /*
1091 * Wait for pre-switchover status to check if migration
1092 * satisfy the convergence condition
1093 */
1094 wait_for_migration_status(from, "pre-switchover", NULL);
1095
1096 remaining = read_ram_property_int(from, "remaining");
1097 g_assert_cmpint(remaining, <,
1098 (expected_threshold + expected_threshold / 100));
1099
1100 migrate_continue(from, "pre-switchover");
1101
1102 qtest_qmp_eventwait(to, "RESUME");
1103
1104 wait_for_serial("dest_serial");
1105 wait_for_migration_complete(from);
1106
1107 migrate_end(from, to, true);
1108 }
1109
migration_test_add_precopy_smoke(MigrationTestEnv * env)1110 static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
1111 {
1112 if (env->is_x86) {
1113 migration_test_add("/migration/precopy/unix/suspend/live",
1114 test_precopy_unix_suspend_live);
1115 migration_test_add("/migration/precopy/unix/suspend/notlive",
1116 test_precopy_unix_suspend_notlive);
1117 }
1118
1119 migration_test_add("/migration/precopy/unix/plain",
1120 test_precopy_unix_plain);
1121
1122 migration_test_add("/migration/precopy/tcp/plain", test_precopy_tcp_plain);
1123 migration_test_add("/migration/multifd/tcp/uri/plain/none",
1124 test_multifd_tcp_uri_none);
1125 migration_test_add("/migration/multifd/tcp/plain/cancel",
1126 test_multifd_tcp_cancel);
1127 }
1128
migration_test_add_precopy(MigrationTestEnv * env)1129 void migration_test_add_precopy(MigrationTestEnv *env)
1130 {
1131 tmpfs = env->tmpfs;
1132
1133 migration_test_add_precopy_smoke(env);
1134
1135 if (!env->full_set) {
1136 return;
1137 }
1138
1139 migration_test_add("/migration/precopy/tcp/plain/switchover-ack",
1140 test_precopy_tcp_switchover_ack);
1141
1142 #ifndef _WIN32
1143 migration_test_add("/migration/precopy/fd/tcp",
1144 test_precopy_fd_socket);
1145 migration_test_add("/migration/precopy/fd/file",
1146 test_precopy_fd_file);
1147 #endif
1148
1149 /*
1150 * See explanation why this test is slow on function definition
1151 */
1152 if (g_test_slow()) {
1153 migration_test_add("/migration/auto_converge",
1154 test_auto_converge);
1155 if (g_str_equal(env->arch, "x86_64") &&
1156 env->has_kvm && env->has_dirty_ring) {
1157 migration_test_add("/dirty_limit",
1158 test_dirty_limit);
1159 }
1160 }
1161 migration_test_add("/migration/multifd/tcp/channels/plain/none",
1162 test_multifd_tcp_channels_none);
1163 migration_test_add("/migration/multifd/tcp/plain/zero-page/legacy",
1164 test_multifd_tcp_zero_page_legacy);
1165 migration_test_add("/migration/multifd/tcp/plain/zero-page/none",
1166 test_multifd_tcp_no_zero_page);
1167 if (g_str_equal(env->arch, "x86_64")
1168 && env->has_kvm && env->has_dirty_ring) {
1169
1170 migration_test_add("/migration/dirty_ring",
1171 test_precopy_unix_dirty_ring);
1172 if (qtest_has_machine("pc") && g_test_slow()) {
1173 migration_test_add("/migration/vcpu_dirty_limit",
1174 test_vcpu_dirty_limit);
1175 }
1176 }
1177
1178 /* ensure new status don't go unnoticed */
1179 assert(MIGRATION_STATUS__MAX == 15);
1180
1181 for (int i = MIGRATION_STATUS_NONE; i < MIGRATION_STATUS__MAX; i++) {
1182 switch (i) {
1183 case MIGRATION_STATUS_DEVICE: /* happens too fast */
1184 case MIGRATION_STATUS_WAIT_UNPLUG: /* no support in tests */
1185 case MIGRATION_STATUS_COLO: /* no support in tests */
1186 case MIGRATION_STATUS_POSTCOPY_ACTIVE: /* postcopy can't be cancelled */
1187 case MIGRATION_STATUS_POSTCOPY_PAUSED:
1188 case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
1189 case MIGRATION_STATUS_POSTCOPY_RECOVER:
1190 continue;
1191 default:
1192 migration_test_add_suffix("/migration/cancel/src/after/",
1193 MigrationStatus_str(i),
1194 test_cancel_src_after_status);
1195 }
1196 }
1197 }
1198